mirror of
https://github.com/chromium/crashpad.git
synced 2025-03-09 22:26:06 +00:00
Retry UploadReport on http upload failures.
This implements a per-report retry rate limit (as opposed to per upload rate limit in ShouldRateLimitUpload). When a report upload ends in a retry state, an in-memory only timestamp is stored with the next possible retry time. This timestamp is a backoff from the main thread work interval, doubling on each attemt. Because this is only stored in memory, on restart reports in the retry state will always be tried once, and then fall back into the next backoff. This continues until 5 retry attempts are reached. Change-Id: Ibde8855a8a9f0743f0b0bd4d5e3de8a45c64bcb6 Reviewed-on: https://chromium-review.googlesource.com/c/crashpad/crashpad/+/3087723 Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Robert Sesek <rsesek@chromium.org> Commit-Queue: Justin Cohen <justincohen@chromium.org>
This commit is contained in:
parent
70e4327a73
commit
92f6c21242
@ -46,6 +46,21 @@
|
||||
|
||||
namespace crashpad {
|
||||
|
||||
namespace {
|
||||
|
||||
// The number of seconds to wait between checking for pending reports.
|
||||
const int kRetryWorkIntervalSeconds = 15 * 60;
|
||||
|
||||
#if defined(OS_IOS)
|
||||
// The number of times to attempt to upload a pending report, repeated on
|
||||
// failure. Attempts will happen once per launch, once per call to
|
||||
// ReportPending(), and, if Options.watch_pending_reports is true, once every
|
||||
// kRetryWorkIntervalSeconds. Currently iOS only.
|
||||
const int kRetryAttempts = 5;
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
CrashReportUploadThread::CrashReportUploadThread(CrashReportDatabase* database,
|
||||
const std::string& url,
|
||||
const Options& options)
|
||||
@ -55,7 +70,7 @@ CrashReportUploadThread::CrashReportUploadThread(CrashReportDatabase* database,
|
||||
// absence of a signal from the handler thread. This allows for failed
|
||||
// uploads to be retried periodically, and for pending reports written by
|
||||
// other processes to be recognized.
|
||||
thread_(options.watch_pending_reports ? 15 * 60.0
|
||||
thread_(options.watch_pending_reports ? kRetryWorkIntervalSeconds
|
||||
: WorkerThread::kIndefiniteWait,
|
||||
this),
|
||||
known_pending_report_uuids_(),
|
||||
@ -154,45 +169,13 @@ void CrashReportUploadThread::ProcessPendingReport(
|
||||
return;
|
||||
}
|
||||
|
||||
// This currently implements very simplistic rate-limiting, compatible with
|
||||
// the Breakpad client, where the strategy is to permit one upload attempt per
|
||||
// hour, and retire reports that would exceed this limit or for which the
|
||||
// upload fails on the first attempt.
|
||||
//
|
||||
// If upload was requested explicitly (i.e. by user action), we do not
|
||||
// throttle the upload.
|
||||
//
|
||||
// TODO(mark): Provide a proper rate-limiting strategy and allow for failed
|
||||
// upload attempts to be retried.
|
||||
if (!report.upload_explicitly_requested && options_.rate_limit) {
|
||||
time_t last_upload_attempt_time;
|
||||
if (settings->GetLastUploadAttemptTime(&last_upload_attempt_time)) {
|
||||
time_t now = time(nullptr);
|
||||
if (now >= last_upload_attempt_time) {
|
||||
// If the most recent upload attempt occurred within the past hour,
|
||||
// don’t attempt to upload the new report. If it happened longer ago,
|
||||
// attempt to upload the report.
|
||||
constexpr int kUploadAttemptIntervalSeconds = 60 * 60; // 1 hour
|
||||
if (now - last_upload_attempt_time < kUploadAttemptIntervalSeconds) {
|
||||
database_->SkipReportUpload(
|
||||
report.uuid, Metrics::CrashSkippedReason::kUploadThrottled);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// The most recent upload attempt purportedly occurred in the future. If
|
||||
// it “happened” at least one day in the future, assume that the last
|
||||
// upload attempt time is bogus, and attempt to upload the report. If
|
||||
// the most recent upload time is in the future but within one day,
|
||||
// accept it and don’t attempt to upload the report.
|
||||
constexpr int kBackwardsClockTolerance = 60 * 60 * 24; // 1 day
|
||||
if (last_upload_attempt_time - now < kBackwardsClockTolerance) {
|
||||
database_->SkipReportUpload(
|
||||
report.uuid, Metrics::CrashSkippedReason::kUnexpectedTime);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ShouldRateLimitUpload(report))
|
||||
return;
|
||||
|
||||
#if defined(OS_IOS)
|
||||
if (ShouldRateLimitRetry(report))
|
||||
return;
|
||||
#endif
|
||||
|
||||
std::unique_ptr<const CrashReportDatabase::UploadReport> upload_report;
|
||||
CrashReportDatabase::OperationStatus status =
|
||||
@ -234,6 +217,19 @@ void CrashReportUploadThread::ProcessPendingReport(
|
||||
report.uuid, Metrics::CrashSkippedReason::kPrepareForUploadFailed);
|
||||
break;
|
||||
case UploadResult::kRetry:
|
||||
#if defined(OS_IOS)
|
||||
if (upload_report->upload_attempts > kRetryAttempts) {
|
||||
upload_report.reset();
|
||||
database_->SkipReportUpload(report.uuid,
|
||||
Metrics::CrashSkippedReason::kUploadFailed);
|
||||
} else {
|
||||
Metrics::CrashUploadSkipped(
|
||||
Metrics::CrashSkippedReason::kUploadFailedButCanRetry);
|
||||
retry_uuid_time_map_[report.uuid] =
|
||||
time(nullptr) +
|
||||
(1 << upload_report->upload_attempts) * kRetryWorkIntervalSeconds;
|
||||
}
|
||||
#else
|
||||
upload_report.reset();
|
||||
|
||||
// TODO(mark): Deal with retries properly: don’t call SkipReportUplaod()
|
||||
@ -241,6 +237,7 @@ void CrashReportUploadThread::ProcessPendingReport(
|
||||
// too many times.
|
||||
database_->SkipReportUpload(report.uuid,
|
||||
Metrics::CrashSkippedReason::kUploadFailed);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -344,4 +341,55 @@ void CrashReportUploadThread::DoWork(const WorkerThread* thread) {
|
||||
ProcessPendingReports();
|
||||
}
|
||||
|
||||
bool CrashReportUploadThread::ShouldRateLimitUpload(
|
||||
const CrashReportDatabase::Report& report) {
|
||||
if (report.upload_explicitly_requested || !options_.rate_limit)
|
||||
return false;
|
||||
|
||||
Settings* const settings = database_->GetSettings();
|
||||
time_t last_upload_attempt_time;
|
||||
if (settings->GetLastUploadAttemptTime(&last_upload_attempt_time)) {
|
||||
time_t now = time(nullptr);
|
||||
if (now >= last_upload_attempt_time) {
|
||||
// If the most recent upload attempt occurred within the past hour,
|
||||
// don’t attempt to upload the new report. If it happened longer ago,
|
||||
// attempt to upload the report.
|
||||
constexpr int kUploadAttemptIntervalSeconds = 60 * 60; // 1 hour
|
||||
if (now - last_upload_attempt_time < kUploadAttemptIntervalSeconds) {
|
||||
database_->SkipReportUpload(
|
||||
report.uuid, Metrics::CrashSkippedReason::kUploadThrottled);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// The most recent upload attempt purportedly occurred in the future. If
|
||||
// it “happened” at least one day in the future, assume that the last
|
||||
// upload attempt time is bogus, and attempt to upload the report. If
|
||||
// the most recent upload time is in the future but within one day,
|
||||
// accept it and don’t attempt to upload the report.
|
||||
constexpr int kBackwardsClockTolerance = 60 * 60 * 24; // 1 day
|
||||
if (last_upload_attempt_time - now < kBackwardsClockTolerance) {
|
||||
database_->SkipReportUpload(
|
||||
report.uuid, Metrics::CrashSkippedReason::kUnexpectedTime);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(OS_IOS)
|
||||
bool CrashReportUploadThread::ShouldRateLimitRetry(
|
||||
const CrashReportDatabase::Report& report) {
|
||||
if (retry_uuid_time_map_.find(report.uuid) != retry_uuid_time_map_.end()) {
|
||||
time_t now = time(nullptr);
|
||||
if (now < retry_uuid_time_map_[report.uuid]) {
|
||||
return true;
|
||||
} else {
|
||||
retry_uuid_time_map_.erase(report.uuid);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace crashpad
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "client/crash_report_database.h"
|
||||
#include "util/misc/uuid.h"
|
||||
@ -170,10 +171,45 @@ class CrashReportUploadThread : public WorkerThread::Delegate,
|
||||
//! been called on any thread, as well as periodically on a timer.
|
||||
void DoWork(const WorkerThread* thread) override;
|
||||
|
||||
//! \brief Rate-limit uploads.
|
||||
//!
|
||||
//! \param[in] report The crash report to process.
|
||||
//!
|
||||
//! This currently implements very simplistic rate-limiting, compatible with
|
||||
//! the Breakpad client, where the strategy is to permit one upload attempt
|
||||
//! per hour, and retire reports that would exceed this limit or for which the
|
||||
//! upload fails on the first attempt.
|
||||
//! If upload was requested explicitly (i.e. by user action), do not throttle
|
||||
//! the upload.
|
||||
//!
|
||||
//! TODO(mark): Provide a proper rate-limiting strategy and allow for failed
|
||||
//! upload attempts to be retried.
|
||||
bool ShouldRateLimitUpload(const CrashReportDatabase::Report& report);
|
||||
|
||||
#if defined(OS_IOS)
|
||||
//! \brief Rate-limit report retries.
|
||||
//!
|
||||
//! \param[in] report The crash report to process.
|
||||
//!
|
||||
//! This implements a per-report retry rate limit (as opposed to per upload
|
||||
//! rate limit in ShouldRateLimitUpload). When a report upload ends in a retry
|
||||
//! state, an in-memory only timestamp is stored in |retry_uuid_time_map_|
|
||||
//! with the next possible retry time. This timestamp is a backoff from the
|
||||
//! main thread work interval, doubling on each attemt. Because this is only
|
||||
//! stored in memory, on restart reports in the retry state will always be
|
||||
//! tried once, and then fall back into the next backoff. This continues until
|
||||
//! kRetryAttempts is reached.
|
||||
bool ShouldRateLimitRetry(const CrashReportDatabase::Report& report);
|
||||
#endif
|
||||
|
||||
const Options options_;
|
||||
const std::string url_;
|
||||
WorkerThread thread_;
|
||||
ThreadSafeVector<UUID> known_pending_report_uuids_;
|
||||
#if defined(OS_IOS)
|
||||
// This is not thread-safe, and only used by the worker thread.
|
||||
std::map<UUID, time_t> retry_uuid_time_map_;
|
||||
#endif
|
||||
CrashReportDatabase* database_; // weak
|
||||
};
|
||||
|
||||
|
@ -89,6 +89,10 @@ class Metrics {
|
||||
//! and uploading it to the crash server.
|
||||
kPrepareForUploadFailed = 5,
|
||||
|
||||
//! \brief The upload of the crash failed during communication with the
|
||||
//! server, but the upload can be retried later.
|
||||
kUploadFailedButCanRetry = 6,
|
||||
|
||||
//! \brief The number of values in this enumeration; not a valid value.
|
||||
kMaxValue
|
||||
};
|
||||
|
@ -43,6 +43,10 @@ bool UUID::operator==(const UUID& that) const {
|
||||
return memcmp(this, &that, sizeof(*this)) == 0;
|
||||
}
|
||||
|
||||
bool UUID::operator<(const UUID& that) const {
|
||||
return memcmp(this, &that, sizeof(*this)) < 0;
|
||||
}
|
||||
|
||||
void UUID::InitializeToZero() {
|
||||
memset(this, 0, sizeof(*this));
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ namespace crashpad {
|
||||
struct UUID {
|
||||
bool operator==(const UUID& that) const;
|
||||
bool operator!=(const UUID& that) const { return !operator==(that); }
|
||||
bool operator<(const UUID& that) const;
|
||||
|
||||
//! \brief Initializes the %UUID to zero.
|
||||
void InitializeToZero();
|
||||
|
@ -85,24 +85,40 @@ TEST(UUID, UUID) {
|
||||
EXPECT_EQ(uuid_2, uuid);
|
||||
EXPECT_FALSE(uuid != uuid_2);
|
||||
|
||||
// Test operator< operator
|
||||
UUID uuid_3{};
|
||||
UUID uuid_4;
|
||||
uuid_4.InitializeFromString("11111111-1111-1111-1111-111111111111");
|
||||
UUID uuid_5;
|
||||
uuid_5.InitializeFromString("22222222-2222-2222-2222-222222222222");
|
||||
|
||||
EXPECT_LT(uuid_3, uuid_4);
|
||||
EXPECT_LT(uuid_3, uuid_5);
|
||||
EXPECT_LT(uuid_4, uuid_5);
|
||||
|
||||
// Make sure that operator== and operator!= check the entire UUID.
|
||||
++uuid.data_1;
|
||||
EXPECT_NE(uuid, uuid_2);
|
||||
EXPECT_LT(uuid_2, uuid);
|
||||
--uuid.data_1;
|
||||
++uuid.data_2;
|
||||
EXPECT_NE(uuid, uuid_2);
|
||||
EXPECT_LT(uuid_2, uuid);
|
||||
--uuid.data_2;
|
||||
++uuid.data_3;
|
||||
EXPECT_NE(uuid, uuid_2);
|
||||
EXPECT_LT(uuid_2, uuid);
|
||||
--uuid.data_3;
|
||||
for (size_t index = 0; index < base::size(uuid.data_4); ++index) {
|
||||
++uuid.data_4[index];
|
||||
EXPECT_NE(uuid, uuid_2);
|
||||
EXPECT_LT(uuid_2, uuid);
|
||||
--uuid.data_4[index];
|
||||
}
|
||||
for (size_t index = 0; index < base::size(uuid.data_5); ++index) {
|
||||
++uuid.data_5[index];
|
||||
EXPECT_NE(uuid, uuid_2);
|
||||
EXPECT_LT(uuid_2, uuid);
|
||||
--uuid.data_5[index];
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user