fix: require real progress evidence artifacts
This commit is contained in:
@@ -96,13 +96,36 @@ async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any |
|
|||||||
}
|
}
|
||||||
|
|
||||||
function buildProgressEvidence(wrapperResult: any): Record<string, unknown> | null {
|
function buildProgressEvidence(wrapperResult: any): Record<string, unknown> | null {
|
||||||
|
const candidate = wrapperResult?.progressEvidence;
|
||||||
|
if (!candidate || typeof candidate !== "object" || Array.isArray(candidate)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const progressEvidence: Record<string, unknown> = {};
|
const progressEvidence: Record<string, unknown> = {};
|
||||||
|
|
||||||
const taskName = typeof wrapperResult?.taskRecord?.task_name === "string"
|
const sessionKey = typeof candidate.sessionKey === "string"
|
||||||
? wrapperResult.taskRecord.task_name.trim()
|
? candidate.sessionKey.trim()
|
||||||
: "";
|
: "";
|
||||||
if (wrapperResult?.silentLaunchOk === true && taskName) {
|
if (sessionKey) {
|
||||||
progressEvidence.sessionKey = taskName;
|
progressEvidence.sessionKey = sessionKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
const runId = typeof candidate.runId === "string"
|
||||||
|
? candidate.runId.trim()
|
||||||
|
: "";
|
||||||
|
if (runId) {
|
||||||
|
progressEvidence.runId = runId;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(candidate.modified_files) && candidate.modified_files.length > 0) {
|
||||||
|
progressEvidence.modified_files = candidate.modified_files;
|
||||||
|
}
|
||||||
|
|
||||||
|
const verificationResult = typeof candidate.verificationResult === "string"
|
||||||
|
? candidate.verificationResult.trim()
|
||||||
|
: "";
|
||||||
|
if (verificationResult) {
|
||||||
|
progressEvidence.verificationResult = verificationResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Object.keys(progressEvidence).length > 0 ? progressEvidence : null;
|
return Object.keys(progressEvidence).length > 0 ? progressEvidence : null;
|
||||||
@@ -174,9 +197,8 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
const progressEvidenceReason = claimedProgression && !progressEvidence
|
const progressEvidenceReason = claimedProgression && !progressEvidence
|
||||||
? "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result"
|
? "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result"
|
||||||
: "";
|
: "";
|
||||||
const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true
|
const hasExternalizedCheckpointEvidence = typeof wrapperResult.externalizedCheckpointPath === "string"
|
||||||
&& typeof wrapperResult.taskRecord?.task_name === "string"
|
&& wrapperResult.externalizedCheckpointPath.trim().length > 0;
|
||||||
&& wrapperResult.taskRecord.task_name.trim().length > 0;
|
|
||||||
const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
|
const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -206,8 +228,8 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
dispatchEvidence: "",
|
dispatchEvidence: "",
|
||||||
fileChangeEvidence: "",
|
fileChangeEvidence: "",
|
||||||
verificationEvidence: "",
|
verificationEvidence: "",
|
||||||
checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
|
checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.externalizedCheckpointPath.trim() : "",
|
||||||
externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
|
externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.externalizedCheckpointPath.trim() : "",
|
||||||
externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
|
externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
|
||||||
handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
|
handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
|
||||||
replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
|
replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
|
||||||
|
|||||||
@@ -209,7 +209,8 @@ async function main() {
|
|||||||
dispatched: true,
|
dispatched: true,
|
||||||
event: 'dispatch',
|
event: 'dispatch',
|
||||||
},
|
},
|
||||||
taskRecord: { task_name: 'task-123' },
|
progressEvidence: { sessionKey: 'task-123' },
|
||||||
|
externalizedCheckpointPath: 'checkpoints/task-123.json',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, requestText));
|
}), async () => runScenario(forceRecall, requestText));
|
||||||
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
|
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
|
||||||
@@ -285,7 +286,8 @@ async function main() {
|
|||||||
dispatched: true,
|
dispatched: true,
|
||||||
event: 'dispatch',
|
event: 'dispatch',
|
||||||
},
|
},
|
||||||
taskRecord: { task_name: 'task-spec-review-missing-evidence' },
|
progressEvidence: { sessionKey: 'task-spec-review-missing-evidence' },
|
||||||
|
externalizedCheckpointPath: 'checkpoints/task-spec-review-missing-evidence.json',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
||||||
assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block');
|
assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block');
|
||||||
@@ -310,7 +312,8 @@ async function main() {
|
|||||||
dispatched: true,
|
dispatched: true,
|
||||||
event: 'dispatch',
|
event: 'dispatch',
|
||||||
},
|
},
|
||||||
taskRecord: { task_name: 'task-fix-slice-missing-evidence' },
|
progressEvidence: { sessionKey: 'task-fix-slice-missing-evidence' },
|
||||||
|
externalizedCheckpointPath: 'checkpoints/task-fix-slice-missing-evidence.json',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
||||||
assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block');
|
assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block');
|
||||||
@@ -334,7 +337,8 @@ async function main() {
|
|||||||
dispatched: true,
|
dispatched: true,
|
||||||
event: 'dispatch',
|
event: 'dispatch',
|
||||||
},
|
},
|
||||||
taskRecord: { task_name: 'task-implementation-missing-evidence' },
|
progressEvidence: { sessionKey: 'task-implementation-missing-evidence' },
|
||||||
|
externalizedCheckpointPath: 'checkpoints/task-implementation-missing-evidence.json',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
||||||
assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block');
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block');
|
||||||
|
|||||||
Reference in New Issue
Block a user