Merge pull request #3125 from github/cklin/overlay-restore-timeout

Overlay: use restoreCache() timeout
This commit is contained in:
Chuan-kai Lin
2025-09-19 10:25:21 -07:00
committed by GitHub
3 changed files with 70 additions and 5 deletions
+1 -1
View File
@@ -90872,7 +90872,7 @@ function computeChangedFiles(baseFileOids, overlayFileOids) {
}
var CACHE_VERSION = 1;
var CACHE_PREFIX = "codeql-overlay-base-database";
var MAX_CACHE_OPERATION_MS = 12e4;
var MAX_CACHE_OPERATION_MS = 6e5;
function checkOverlayBaseDatabase(config, logger, warningPrefix) {
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
if (!fs6.existsSync(baseDatabaseOidsFilePath)) {
+32 -2
View File
@@ -86478,7 +86478,7 @@ function computeChangedFiles(baseFileOids, overlayFileOids) {
}
var CACHE_VERSION = 1;
var CACHE_PREFIX = "codeql-overlay-base-database";
var MAX_CACHE_OPERATION_MS = 12e4;
var MAX_CACHE_OPERATION_MS = 6e5;
function checkOverlayBaseDatabase(config, logger, warningPrefix) {
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
if (!fs6.existsSync(baseDatabaseOidsFilePath)) {
@@ -86522,8 +86522,38 @@ async function downloadOverlayBaseDatabaseFromCache(codeql, config, logger) {
try {
const databaseDownloadStart = performance.now();
const foundKey = await waitForResultWithTimeLimit(
// This ten-minute limit for the cache restore operation is mainly to
// guard against the possibility that the cache service is unresponsive
// and hangs outside the data download.
//
// Data download (which is normally the most time-consuming part of the
// restore operation) should not run long enough to hit this limit. Even
// for an extremely large 10GB database, at a download speed of 40MB/s
// (see below), the download should complete within five minutes. If we
// do hit this limit, there are likely more serious problems other than
// mere slow download speed.
//
// This is important because we don't want any ongoing file operations
// on the database directory when we do hit this limit. Hitting this
// time limit takes us to a fallback path where we re-initialize the
// database from scratch at dbLocation, and having the cache restore
// operation continue to write into dbLocation in the background would
// really mess things up. We want to hit this limit only in the case
// of a hung cache service, not just slow download speed.
MAX_CACHE_OPERATION_MS,
actionsCache.restoreCache([dbLocation], cacheRestoreKeyPrefix),
actionsCache.restoreCache(
[dbLocation],
cacheRestoreKeyPrefix,
void 0,
{
// Azure SDK download (which is the default) uses 128MB segments; see
// https://github.com/actions/toolkit/blob/main/packages/cache/README.md.
// Setting segmentTimeoutInMs to 3000 translates to segment download
// speed of about 40 MB/s, which should be achievable unless the
// download is unreliable (in which case we do want to abort).
segmentTimeoutInMs: 3e3
}
),
() => {
logger.info("Timed out downloading overlay-base database from cache");
}
+37 -2
View File
@@ -158,7 +158,12 @@ function computeChangedFiles(
// Constants for database caching
const CACHE_VERSION = 1;
const CACHE_PREFIX = "codeql-overlay-base-database";
const MAX_CACHE_OPERATION_MS = 120_000; // Two minutes
// The purpose of this ten-minute limit is to guard against the possibility
// that the cache service is unresponsive, which would otherwise cause the
// entire action to hang. Normally we expect cache operations to complete
// within two minutes.
const MAX_CACHE_OPERATION_MS = 600_000;
/**
* Checks that the overlay-base database is valid by checking for the
@@ -351,8 +356,38 @@ export async function downloadOverlayBaseDatabaseFromCache(
try {
const databaseDownloadStart = performance.now();
const foundKey = await waitForResultWithTimeLimit(
// This ten-minute limit for the cache restore operation is mainly to
// guard against the possibility that the cache service is unresponsive
// and hangs outside the data download.
//
// Data download (which is normally the most time-consuming part of the
// restore operation) should not run long enough to hit this limit. Even
// for an extremely large 10GB database, at a download speed of 40MB/s
// (see below), the download should complete within five minutes. If we
// do hit this limit, there are likely more serious problems other than
// mere slow download speed.
//
// This is important because we don't want any ongoing file operations
// on the database directory when we do hit this limit. Hitting this
// time limit takes us to a fallback path where we re-initialize the
// database from scratch at dbLocation, and having the cache restore
// operation continue to write into dbLocation in the background would
// really mess things up. We want to hit this limit only in the case
// of a hung cache service, not just slow download speed.
MAX_CACHE_OPERATION_MS,
actionsCache.restoreCache([dbLocation], cacheRestoreKeyPrefix),
actionsCache.restoreCache(
[dbLocation],
cacheRestoreKeyPrefix,
undefined,
{
// Azure SDK download (which is the default) uses 128MB segments; see
// https://github.com/actions/toolkit/blob/main/packages/cache/README.md.
// Setting segmentTimeoutInMs to 3000 translates to segment download
// speed of about 40 MB/s, which should be achievable unless the
// download is unreliable (in which case we do want to abort).
segmentTimeoutInMs: 3000,
},
),
() => {
logger.info("Timed out downloading overlay-base database from cache");
},