feat: require concrete evidence for progress claims
This commit is contained in:
@@ -124,6 +124,9 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
concreteNextAction: requiredNextAction,
|
concreteNextAction: requiredNextAction,
|
||||||
}
|
}
|
||||||
: null;
|
: null;
|
||||||
|
const progressEvidenceReason = progressEvidence
|
||||||
|
? ""
|
||||||
|
: "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result";
|
||||||
const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true
|
const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true
|
||||||
&& typeof wrapperResult.taskRecord?.task_name === "string"
|
&& typeof wrapperResult.taskRecord?.task_name === "string"
|
||||||
&& wrapperResult.taskRecord.task_name.trim().length > 0;
|
&& wrapperResult.taskRecord.task_name.trim().length > 0;
|
||||||
@@ -145,6 +148,7 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
statusSummary: claimedProgression,
|
statusSummary: claimedProgression,
|
||||||
executionEvidence,
|
executionEvidence,
|
||||||
progressEvidence,
|
progressEvidence,
|
||||||
|
progressEvidenceReason,
|
||||||
sessionKey: typeof progressEvidence?.sessionKey === "string" ? progressEvidence.sessionKey : "",
|
sessionKey: typeof progressEvidence?.sessionKey === "string" ? progressEvidence.sessionKey : "",
|
||||||
runId: typeof progressEvidence?.runId === "string" ? progressEvidence.runId : "",
|
runId: typeof progressEvidence?.runId === "string" ? progressEvidence.runId : "",
|
||||||
modified_files: Array.isArray(progressEvidence?.modified_files) ? progressEvidence.modified_files : [],
|
modified_files: Array.isArray(progressEvidence?.modified_files) ? progressEvidence.modified_files : [],
|
||||||
@@ -221,6 +225,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
|
|||||||
"gateRequired=unknown",
|
"gateRequired=unknown",
|
||||||
"- ENFORCEMENT: Gate-lock evaluator unavailable; keep existing long-task safeguards in force.",
|
"- ENFORCEMENT: Gate-lock evaluator unavailable; keep existing long-task safeguards in force.",
|
||||||
"- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete progress evidence such as a sessionKey, runId, modified_files record, verification result, actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
|
"- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete progress evidence such as a sessionKey, runId, modified_files record, verification result, actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
|
||||||
|
"- ENFORCEMENT: Hook inputs for any progression claim should carry progressEvidence (or equivalent concrete fields) so the gate can verify the claim.",
|
||||||
"- HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable progress evidence.",
|
"- HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable progress evidence.",
|
||||||
"- HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.",
|
"- HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.",
|
||||||
"[/LONG_TASK_GATE_LOCK]",
|
"[/LONG_TASK_GATE_LOCK]",
|
||||||
@@ -239,6 +244,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
|
|||||||
})),
|
})),
|
||||||
...((gateLockResult.allowedResponseModes ?? []).map((mode) => `allowedResponseMode=${mode}`)),
|
...((gateLockResult.allowedResponseModes ?? []).map((mode) => `allowedResponseMode=${mode}`)),
|
||||||
"- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete progress evidence such as a sessionKey, runId, modified_files record, verification result, actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
|
"- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete progress evidence such as a sessionKey, runId, modified_files record, verification result, actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
|
||||||
|
"- ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.",
|
||||||
"- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.",
|
"- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.",
|
||||||
"- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.",
|
"- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.",
|
||||||
];
|
];
|
||||||
@@ -246,6 +252,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
|
|||||||
if (gateLockResult.gateStatus === "fail") {
|
if (gateLockResult.gateStatus === "fail") {
|
||||||
lines.push("- HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.");
|
lines.push("- HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.");
|
||||||
lines.push("- HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete progress evidence.");
|
lines.push("- HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete progress evidence.");
|
||||||
|
lines.push("- HARD_GATE: If a progression claim exists, the hook input must supply progressEvidence (or equivalent concrete fields) before the claim can pass gate.");
|
||||||
lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.");
|
lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.");
|
||||||
lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up.");
|
lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up.");
|
||||||
lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.");
|
lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.");
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ function claimsProgressionWithoutEvidence(input) {
|
|||||||
|
|
||||||
if (!hasNonEmptyString(progressionClaim)) return false;
|
if (!hasNonEmptyString(progressionClaim)) return false;
|
||||||
|
|
||||||
return !(hasProgressEvidence(input) || hasExecutionEvidence(input));
|
return !hasProgressEvidence(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
function evaluateGate(input) {
|
function evaluateGate(input) {
|
||||||
|
|||||||
@@ -69,8 +69,10 @@ async function main() {
|
|||||||
'reason=claimed progression without concrete progress evidence is forbidden',
|
'reason=claimed progression without concrete progress evidence is forbidden',
|
||||||
'requiredEvidence=progressEvidence',
|
'requiredEvidence=progressEvidence',
|
||||||
'requiredValue=sessionKey, runId, modified_files, verification result, or equivalent concrete progress evidence',
|
'requiredValue=sessionKey, runId, modified_files, verification result, or equivalent concrete progress evidence',
|
||||||
|
'ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.',
|
||||||
'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
|
'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
|
||||||
'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete progress evidence.',
|
'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete progress evidence.',
|
||||||
|
'HARD_GATE: If a progression claim exists, the hook input must supply progressEvidence (or equivalent concrete fields) before the claim can pass gate.',
|
||||||
'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.',
|
'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.',
|
||||||
'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
|
'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
|
||||||
'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
|
'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
|
||||||
|
|||||||
Reference in New Issue
Block a user