diff --git a/app/lib/task/backend.dart b/app/lib/task/backend.dart index 3344bf738..178b8d90c 100644 --- a/app/lib/task/backend.dart +++ b/app/lib/task/backend.dart @@ -695,17 +695,9 @@ class TaskBackend { zone = versionState.zone!; instance = versionState.instance!; - - // Remove instanceName, zone, secretToken, and set attempts = 0 - state.versions![version] = PackageVersionStateInfo( - scheduled: versionState.scheduled, + state.versions![version] = versionState.complete( docs: hasDocIndexHtml, pana: summary != null, - finished: true, - attempts: 0, - instance: null, // version is no-longer running on this instance - secretToken: null, // TODO: Consider retaining this for idempotency - zone: null, ); // Determine if something else was running on the instance @@ -1008,13 +1000,12 @@ class TaskBackend { await for (final state in _db.tasks.listAllForCurrentRuntime()) { final zone = taskWorkerCloudCompute.zones.first; // ignore: invalid_use_of_visible_for_testing_member - final updated = await updatePackageStateWithPendingVersions( + final payload = await updatePackageStateWithPendingVersions( _db, state.package, zone, taskWorkerCloudCompute.generateInstanceName(), ); - final payload = updated?.$1; if (payload == null) continue; await processPayload(payload); } @@ -1424,7 +1415,6 @@ final class _TaskDataAccess { Future restorePreviousVersionsState( String packageName, String instanceName, - Map previousVersionsMap, ) async { await withRetryTransaction(_db, (tx) async { final s = await tx.tasks.lookupOrNull(packageName); @@ -1435,7 +1425,7 @@ final class _TaskDataAccess { s.versions!.addEntries( s.versions!.entries .where((e) => e.value.instance == instanceName) - .map((e) => MapEntry(e.key, previousVersionsMap[e.key]!)), + .map((e) => MapEntry(e.key, e.value.resetAfterFailedAttempt())), ); s.pendingAt = derivePendingAt( versions: s.versions!, diff --git a/app/lib/task/models.dart b/app/lib/task/models.dart index 1ccd3bba4..b13f67931 100644 --- a/app/lib/task/models.dart +++ b/app/lib/task/models.dart @@ -3,6 +3,7 @@ // BSD-style license that can be found in the LICENSE file. import 'dart:convert' show json; +import 'dart:math'; import 'package:clock/clock.dart'; import 'package:json_annotation/json_annotation.dart'; @@ -249,7 +250,7 @@ List derivePendingVersions({ } /// State of a given `version` within a [PackageState]. -@JsonSerializable() +@JsonSerializable(includeIfNull: false) class PackageVersionStateInfo { PackageVersionStatus get status { if (attempts == 0 && scheduled == initialTimestamp) { @@ -364,6 +365,71 @@ class PackageVersionStateInfo { 'secretToken: $secretToken', ].join(', ') + ')'; + + /// Remove instanceName, zone, secretToken, and set attempts = 0 + PackageVersionStateInfo complete({required bool pana, required bool docs}) { + return PackageVersionStateInfo( + scheduled: scheduled, + attempts: 0, + docs: docs, + pana: pana, + finished: true, + zone: null, + instance: null, // version is no-longer running on this instance + secretToken: null, // TODO: Consider retaining this for idempotency + ); + } + + /// Create updated [PackageVersionStateInfo] for when a new instance have been + /// scheduled. + /// + /// You must supply: + /// * [scheduled] when the instance was scheduled. + /// * [zone] within which the instance was scheduled. + /// * [instanceName] as name of the of the isntance scheduled. + /// * [secretToken] passed to the instance for authentication when + /// the instance wants to callback. + PackageVersionStateInfo scheduleNew({ + required DateTime scheduled, + required String zone, + required String instanceName, + required String secretToken, + }) { + return PackageVersionStateInfo( + scheduled: scheduled, + attempts: attempts + 1, + zone: zone, + instance: instanceName, + secretToken: secretToken, + finished: finished, + docs: docs, + pana: pana, + ); + } + + /// Reverts the status of the last scheduling attempt, when it is known that scheduling failed. + /// + /// > [!WARNING] + /// > This state transition **may only** be used if it's + /// > **known with certainty** that scheduling failed. + /// > + /// > If an instance _may_ have been scheduled, but we suspect + /// > scheduling failed, we have to wait for a retry. + /// > As we otherwise risk leaving an instance unable to call back, + /// > which will leave the instance logging errors that indicate + /// > internal errors in our system. + PackageVersionStateInfo resetAfterFailedAttempt() { + return PackageVersionStateInfo( + scheduled: initialTimestamp, + attempts: max(0, attempts - 1), + zone: null, + instance: null, + secretToken: null, + finished: finished, + docs: docs, + pana: pana, + ); + } } /// A [db.Property] encoding a Map from version to [PackageVersionStateInfo] as JSON. diff --git a/app/lib/task/models.g.dart b/app/lib/task/models.g.dart index d1b3d8816..121f23e3c 100644 --- a/app/lib/task/models.g.dart +++ b/app/lib/task/models.g.dart @@ -27,9 +27,9 @@ Map _$PackageVersionStateInfoToJson( 'finished': instance.finished, 'scheduled': instance.scheduled.toIso8601String(), 'attempts': instance.attempts, - 'zone': instance.zone, - 'instance': instance.instance, - 'secretToken': instance.secretToken, + 'zone': ?instance.zone, + 'instance': ?instance.instance, + 'secretToken': ?instance.secretToken, }; PackageStateInfo _$PackageStateInfoFromJson(Map json) => diff --git a/app/lib/task/scheduler.dart b/app/lib/task/scheduler.dart index 4ec22c2e4..5c5773014 100644 --- a/app/lib/task/scheduler.dart +++ b/app/lib/task/scheduler.dart @@ -101,13 +101,12 @@ Future<(CreateInstancesState, Duration)> runOneCreateInstancesCycle( final instanceName = compute.generateInstanceName(); final zone = pickZone(); - final updated = await updatePackageStateWithPendingVersions( + final payload = await updatePackageStateWithPendingVersions( db, selected.package, zone, instanceName, ); - final payload = updated?.$1; if (payload == null) { return; } @@ -174,15 +173,13 @@ Future<(CreateInstancesState, Duration)> runOneCreateInstancesCycle( banZone(zone, minutes: 15); } if (rollbackPackageState) { - final oldVersionsMap = updated?.$2 ?? const {}; - // Restore the state of the PackageState for versions that were + // Restire the state of the PackageState for versions that were // suppose to run on the instance we just failed to create. // If this doesn't work, we'll eventually retry. Hence, correctness // does not hinge on this transaction being successful. await db.tasks.restorePreviousVersionsState( selected.package, instanceName, - oldVersionsMap, ); } } @@ -221,11 +218,8 @@ Future<(CreateInstancesState, Duration)> runOneCreateInstancesCycle( /// Updates the package state with versions that are already pending or /// will be pending soon. -/// -/// Returns the payload and the old status of the state info version map @visibleForTesting -Future<(Payload, Map)?> -updatePackageStateWithPendingVersions( +Future updatePackageStateWithPendingVersions( DatastoreDB db, String package, String zone, @@ -237,7 +231,6 @@ updatePackageStateWithPendingVersions( // presumably the package was deleted. return null; } - final oldVersionsMap = {...?s.versions}; final now = clock.now(); final pendingVersions = derivePendingVersions( @@ -253,13 +246,11 @@ updatePackageStateWithPendingVersions( // Update PackageState s.versions!.addAll({ for (final v in pendingVersions.map((v) => v.toString())) - v: PackageVersionStateInfo( + v: s.versions![v]!.scheduleNew( scheduled: now, - attempts: s.versions![v]!.attempts + 1, zone: zone, - instance: instanceName, + instanceName: instanceName, secretToken: createUuid(), - finished: s.versions![v]!.finished, ), }); s.pendingAt = derivePendingAt( @@ -279,6 +270,6 @@ updatePackageStateWithPendingVersions( ), ), ); - return (payload, oldVersionsMap); + return payload; }); }