open-design/apps/daemon/tests/critique-orchestrator.test.ts
marco 5dd70b5016
Some checks failed
ci / Validate workspace (push) Successful in 12m32s
landing-page-ci / Validate landing page (push) Successful in 9m41s
landing-page-deploy / Deploy landing page (push) Failing after 5m23s
github-metrics / Generate repository metrics SVG (push) Failing after 2m3s
refresh-contributors-wall / Refresh contributors wall cache bust (push) Failing after 11s
Initial import: open-design source for helix-mind.ai distribution
This repository contains the open-design daemon CLI source code, built
and packaged at https://helix-mind.ai/cli/open-design/latest.tgz for use
by the HelixMind /design slash command.

Licenses: Apache-2.0 (root) + MIT (skills/*)
2026-05-06 20:50:24 +02:00

778 lines
29 KiB
TypeScript

import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { mkdtempSync, existsSync, readFileSync } from 'node:fs';
import { rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import { migrateCritique, getCritiqueRun } from '../src/critique/persistence.js';
import { runOrchestrator, type CritiqueSseBus, type OrchestratorParams } from '../src/critique/orchestrator.js';
import type { CritiqueSseEvent } from '@open-design/contracts/critique';
import { defaultCritiqueConfig, type CritiqueConfig } from '@open-design/contracts/critique';
// ---------------------------------------------------------------------------
// DB fixture
// ---------------------------------------------------------------------------
function freshDb(): Database.Database {
const db = new Database(':memory:');
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.exec(`
CREATE TABLE projects (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
CREATE TABLE conversations (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
FOREIGN KEY(project_id) REFERENCES projects(id) ON DELETE CASCADE
);
INSERT INTO projects (id, name, created_at, updated_at) VALUES ('p1', 'p1', 0, 0);
INSERT INTO conversations (id, project_id, created_at, updated_at) VALUES ('c1', 'p1', 0, 0);
`);
migrateCritique(db);
return db;
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeBus(): { bus: CritiqueSseBus; events: CritiqueSseEvent[] } {
const events: CritiqueSseEvent[] = [];
const bus: CritiqueSseBus = { emit: (e) => { events.push(e); } };
return { bus, events };
}
/**
* Builds a minimal 3-round happy-path wire protocol stream. Uses a threshold
* low enough (1.0) so every round passes, meaning SHIP with status=shipped.
*/
function happyStream3Rounds(): string {
return `<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">
<ROUND n="1">
<PANELIST role="designer">
<NOTES>Design intent v1.</NOTES>
<ARTIFACT mime="text/html"><![CDATA[<html></html>]]></ARTIFACT>
</PANELIST>
<PANELIST role="critic" score="9.0">
<DIM name="hierarchy" score="9">Good layout.</DIM>
</PANELIST>
<PANELIST role="brand" score="9.0">
<DIM name="voice" score="9">Strong brand.</DIM>
</PANELIST>
<PANELIST role="a11y" score="9.0">
<DIM name="contrast" score="9">Passes AA.</DIM>
</PANELIST>
<PANELIST role="copy" score="9.0">
<DIM name="clarity" score="9">Clear copy.</DIM>
</PANELIST>
<ROUND_END n="1" composite="9.0" must_fix="0" decision="continue">
<REASON>Composite 9.0 but continuing per test.</REASON>
</ROUND_END>
</ROUND>
<ROUND n="2">
<PANELIST role="designer">
<NOTES>Design intent v2.</NOTES>
</PANELIST>
<PANELIST role="critic" score="9.2">
<DIM name="hierarchy" score="9">Better.</DIM>
</PANELIST>
<PANELIST role="brand" score="9.1">
<DIM name="voice" score="9">Consistent.</DIM>
</PANELIST>
<PANELIST role="a11y" score="9.3">
<DIM name="contrast" score="9">Still passes.</DIM>
</PANELIST>
<PANELIST role="copy" score="9.0">
<DIM name="clarity" score="9">Still clear.</DIM>
</PANELIST>
<ROUND_END n="2" composite="9.15" must_fix="0" decision="continue">
<REASON>Continuing to round 3.</REASON>
</ROUND_END>
</ROUND>
<ROUND n="3">
<PANELIST role="designer">
<NOTES>Design intent v3.</NOTES>
</PANELIST>
<PANELIST role="critic" score="9.5">
<DIM name="hierarchy" score="9">Excellent.</DIM>
</PANELIST>
<PANELIST role="brand" score="9.4">
<DIM name="voice" score="9">Perfect.</DIM>
</PANELIST>
<PANELIST role="a11y" score="9.6">
<DIM name="contrast" score="9">Excellent.</DIM>
</PANELIST>
<PANELIST role="copy" score="9.3">
<DIM name="clarity" score="9">Great.</DIM>
</PANELIST>
<ROUND_END n="3" composite="9.45" must_fix="0" decision="ship">
<REASON>Threshold met.</REASON>
</ROUND_END>
</ROUND>
<SHIP round="3" composite="9.45" status="shipped">
<ARTIFACT mime="text/html"><![CDATA[<html><body>final</body></html>]]></ARTIFACT>
<SUMMARY>Design converged in 3 rounds.</SUMMARY>
</SHIP>
</CRITIQUE_RUN>`;
}
async function* streamOf(text: string, chunkSize = 64): AsyncIterable<string> {
for (let i = 0; i < text.length; i += chunkSize) {
yield text.slice(i, i + chunkSize);
}
}
// ---------------------------------------------------------------------------
// Setup / Teardown
// ---------------------------------------------------------------------------
let tmpDir: string;
let db: Database.Database;
beforeEach(() => {
tmpDir = mkdtempSync(join(tmpdir(), 'od-orch-test-'));
db = freshDb();
});
afterEach(async () => {
db.close();
await rm(tmpDir, { recursive: true, force: true });
});
// ---------------------------------------------------------------------------
// Happy path
// ---------------------------------------------------------------------------
describe('runOrchestrator - happy path', () => {
it('3-round shipped run: row reflects shipped + composite + rounds + transcript path', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run1');
const cfg = defaultCritiqueConfig();
const result = await runOrchestrator({
runId: 'r1',
projectId: 'p1',
conversationId: 'c1',
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: streamOf(happyStream3Rounds()),
});
expect(result.status).toBe('shipped');
expect(result.composite).toBeCloseTo(9.45, 1);
expect(result.rounds).toHaveLength(3);
expect(result.transcriptPath).toBeTruthy();
const row = getCritiqueRun(db, 'r1');
expect(row?.status).toBe('shipped');
expect(row?.rounds).toHaveLength(3);
expect(row?.transcriptPath).toBeTruthy();
// Transcript file exists on disk.
const transcriptFile = join(artifactDir, result.transcriptPath!);
expect(existsSync(transcriptFile)).toBe(true);
// SSE events emitted: should include run_started and ship.
const eventNames = events.map((e) => e.event);
expect(eventNames).toContain('critique.run_started');
expect(eventNames).toContain('critique.ship');
});
it('SSE events are emitted in source order', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-order');
await runOrchestrator({
runId: 'r-order',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg: defaultCritiqueConfig(),
db,
bus,
stdout: streamOf(happyStream3Rounds()),
});
const names = events.map((e) => e.event);
const runStartedIdx = names.indexOf('critique.run_started');
const shipIdx = names.lastIndexOf('critique.ship');
expect(runStartedIdx).toBe(0);
expect(shipIdx).toBeGreaterThan(runStartedIdx);
});
});
// ---------------------------------------------------------------------------
// Malformed / degraded
// ---------------------------------------------------------------------------
describe('runOrchestrator - degraded', () => {
it('malformed input: row is degraded, critique.degraded emitted, no transcript path in row (transcript may still be written)', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-malformed');
// Malformed: ROUND before CRITIQUE_RUN.
const malformedText = `<ROUND n="1"><PANELIST role="critic" score="9"></PANELIST></ROUND>`;
const result = await runOrchestrator({
runId: 'r-malformed',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg: defaultCritiqueConfig(),
db,
bus,
stdout: streamOf(malformedText),
});
expect(result.status).toBe('degraded');
const row = getCritiqueRun(db, 'r-malformed');
expect(row?.status).toBe('degraded');
const degradedEvents = events.filter((e) => e.event === 'critique.degraded');
expect(degradedEvents).toHaveLength(1);
});
});
// ---------------------------------------------------------------------------
// Fallback policy
// ---------------------------------------------------------------------------
describe('runOrchestrator - fallback policy', () => {
it('below threshold: stream ends without SHIP, ship_best selects highest composite', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-below');
// 2 rounds but no SHIP - scores below threshold (default 8.0).
const noShipText = `<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">
<ROUND n="1">
<PANELIST role="designer">
<NOTES>v1</NOTES>
<ARTIFACT mime="text/html"><![CDATA[<html></html>]]></ARTIFACT>
</PANELIST>
<PANELIST role="critic" score="6.0">
<DIM name="h" score="6">needs work</DIM>
<MUST_FIX>Fix hierarchy</MUST_FIX>
</PANELIST>
<PANELIST role="brand" score="6.0"><DIM name="v" score="6">ok</DIM></PANELIST>
<PANELIST role="a11y" score="6.0"><DIM name="c" score="6">ok</DIM></PANELIST>
<PANELIST role="copy" score="6.0"><DIM name="cl" score="6">ok</DIM></PANELIST>
<ROUND_END n="1" composite="6.0" must_fix="1" decision="continue">
<REASON>Below threshold.</REASON>
</ROUND_END>
</ROUND>
<ROUND n="2">
<PANELIST role="designer"><NOTES>v2</NOTES></PANELIST>
<PANELIST role="critic" score="7.0"><DIM name="h" score="7">better</DIM></PANELIST>
<PANELIST role="brand" score="7.0"><DIM name="v" score="7">ok</DIM></PANELIST>
<PANELIST role="a11y" score="7.0"><DIM name="c" score="7">ok</DIM></PANELIST>
<PANELIST role="copy" score="7.0"><DIM name="cl" score="7">ok</DIM></PANELIST>
<ROUND_END n="2" composite="7.0" must_fix="0" decision="continue">
<REASON>Still below threshold.</REASON>
</ROUND_END>
</ROUND>
</CRITIQUE_RUN>`;
const cfg: CritiqueConfig = { ...defaultCritiqueConfig(), fallbackPolicy: 'ship_best' };
const result = await runOrchestrator({
runId: 'r-below',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: streamOf(noShipText),
});
expect(result.status).toBe('below_threshold');
// ship_best should select round 2 (composite 7.0 > 6.0).
expect(result.composite).toBeGreaterThan(6.0);
const row = getCritiqueRun(db, 'r-below');
expect(row?.status).toBe('below_threshold');
const shipEvents = events.filter((e) => e.event === 'critique.ship');
expect(shipEvents).toHaveLength(1);
});
it('fallback policy fail: row is failed, no synthetic ship event', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-failpolicy');
const noShipText = `<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">
<ROUND n="1">
<PANELIST role="designer">
<NOTES>v1</NOTES>
<ARTIFACT mime="text/html"><![CDATA[<html></html>]]></ARTIFACT>
</PANELIST>
<PANELIST role="critic" score="6.0"><DIM name="h" score="6">ok</DIM></PANELIST>
<PANELIST role="brand" score="6.0"><DIM name="v" score="6">ok</DIM></PANELIST>
<PANELIST role="a11y" score="6.0"><DIM name="c" score="6">ok</DIM></PANELIST>
<PANELIST role="copy" score="6.0"><DIM name="cl" score="6">ok</DIM></PANELIST>
<ROUND_END n="1" composite="6.0" must_fix="0" decision="continue">
<REASON>Below threshold.</REASON>
</ROUND_END>
</ROUND>
</CRITIQUE_RUN>`;
const cfg: CritiqueConfig = { ...defaultCritiqueConfig(), fallbackPolicy: 'fail' };
const result = await runOrchestrator({
runId: 'r-failpolicy',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: streamOf(noShipText),
});
expect(result.status).toBe('failed');
const row = getCritiqueRun(db, 'r-failpolicy');
expect(row?.status).toBe('failed');
const shipEvents = events.filter((e) => e.event === 'critique.ship');
expect(shipEvents).toHaveLength(0);
});
});
// ---------------------------------------------------------------------------
// Timeout
// ---------------------------------------------------------------------------
describe('runOrchestrator - timeouts', () => {
it('per-round timeout: stalled stream causes timed_out row', async () => {
const { bus } = makeBus();
const artifactDir = join(tmpDir, 'run-round-timeout');
// Source that yields initial data then stalls past the per-round timeout.
async function* stallingSource(): AsyncIterable<string> {
yield '<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">\n';
yield ' <ROUND n="1">\n';
yield ' <PANELIST role="designer">\n';
yield ' <NOTES>v1</NOTES>\n';
yield ' <ARTIFACT mime="text/html"><![CDATA[<html></html>]]></ARTIFACT>\n';
yield ' </PANELIST>\n';
// Stall: never send ROUND_END, timeout will fire.
await new Promise<void>((_, reject) => setTimeout(() => reject(new Error('stall')), 200));
}
const cfg: CritiqueConfig = {
...defaultCritiqueConfig(),
perRoundTimeoutMs: 50,
totalTimeoutMs: 60_000,
};
const result = await runOrchestrator({
runId: 'r-round-timeout',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: stallingSource(),
});
expect(result.status).toBe('timed_out');
const row = getCritiqueRun(db, 'r-round-timeout');
expect(row?.status).toBe('timed_out');
}, 5000);
it('total timeout: wall-clock deadline exceeded causes timed_out row', async () => {
const { bus } = makeBus();
const artifactDir = join(tmpDir, 'run-total-timeout');
async function* slowSource(): AsyncIterable<string> {
yield '<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">\n';
await new Promise<void>((_, reject) => setTimeout(() => reject(new Error('total stall')), 200));
}
const cfg: CritiqueConfig = {
...defaultCritiqueConfig(),
perRoundTimeoutMs: 60_000,
totalTimeoutMs: 50,
};
const result = await runOrchestrator({
runId: 'r-total-timeout',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: slowSource(),
});
expect(result.status).toBe('timed_out');
const row = getCritiqueRun(db, 'r-total-timeout');
expect(row?.status).toBe('timed_out');
}, 5000);
});
// ---------------------------------------------------------------------------
// Abort signal
// ---------------------------------------------------------------------------
describe('runOrchestrator - abort signal', () => {
it('abort mid-run: row is interrupted, transcript captures events seen so far', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-abort');
const controller = new AbortController();
async function* abortingSource(): AsyncIterable<string> {
yield '<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">\n';
yield ' <ROUND n="1">\n';
yield ' <PANELIST role="designer">\n';
yield ' <NOTES>v1</NOTES>\n';
yield ' <ARTIFACT mime="text/html"><![CDATA[<html></html>]]></ARTIFACT>\n';
yield ' </PANELIST>\n';
// Abort mid-stream.
controller.abort();
yield ' <PANELIST role="critic" score="9">\n';
}
const result = await runOrchestrator({
runId: 'r-abort',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg: defaultCritiqueConfig(),
db,
bus,
stdout: abortingSource(),
signal: controller.signal,
});
expect(result.status).toBe('interrupted');
const row = getCritiqueRun(db, 'r-abort');
expect(row?.status).toBe('interrupted');
// Transcript should exist with partial events.
if (result.transcriptPath) {
expect(existsSync(join(artifactDir, result.transcriptPath))).toBe(true);
}
const interruptedEvents = events.filter((e) => e.event === 'critique.interrupted');
expect(interruptedEvents).toHaveLength(1);
});
});
// ---------------------------------------------------------------------------
// Defensive entry validation
// ---------------------------------------------------------------------------
describe('runOrchestrator - defensive entry', () => {
it('throws RangeError on invalid cfg (negative scoreThreshold) before any side effects', async () => {
const { bus } = makeBus();
const cfg: CritiqueConfig = { ...defaultCritiqueConfig(), scoreThreshold: -1 };
await expect(
runOrchestrator({
runId: 'r-invalid',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir: join(tmpDir, 'run-invalid'),
adapter: 'claude',
cfg,
db,
bus,
stdout: streamOf(''),
}),
).rejects.toThrow(RangeError);
// No row should have been inserted.
expect(getCritiqueRun(db, 'r-invalid')).toBeNull();
});
it('throws RangeError on invalid cfg (zero perRoundTimeoutMs)', async () => {
const { bus } = makeBus();
const cfg: CritiqueConfig = { ...defaultCritiqueConfig(), perRoundTimeoutMs: 0 };
await expect(
runOrchestrator({
runId: 'r-invalid2',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir: join(tmpDir, 'run-invalid2'),
adapter: 'claude',
cfg,
db,
bus,
stdout: streamOf(''),
}),
).rejects.toThrow(RangeError);
expect(getCritiqueRun(db, 'r-invalid2')).toBeNull();
});
});
// ---------------------------------------------------------------------------
// Child exit races (Defect 4)
// ---------------------------------------------------------------------------
describe('runOrchestrator - child exit race (Defect 4)', () => {
it('child exits non-zero mid-stream: result is failed with cli_exit_nonzero', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-child-exit');
// Stub child that exits with code 1 immediately.
let killCalled = false;
const stubChild = { kill: (_sig?: number | NodeJS.Signals) => { killCalled = true; return true as boolean; } };
// childExitPromise resolves with code=1 after a short delay.
const childExitPromise = new Promise<{ code: number | null; signal: string | null }>(
(resolve) => setTimeout(() => resolve({ code: 1, signal: null }), 20),
);
// Stdout that emits the run header then stalls.
// Uses a short delay (longer than childExitPromise's 20ms) so the child
// exit race wins before the stall promise resolves, but the generator
// itself does eventually resolve so iter.return() cleanup doesn't hang.
async function* stallingStdout(): AsyncIterable<string> {
yield '<CRITIQUE_RUN version="1" maxRounds="3" threshold="8.0" scale="10">\n';
yield ' <ROUND n="1">\n';
// Stall for longer than the child exit delay (20ms) but eventually resolve
// so the generator can be cleaned up by iter.return() in applyTimeouts.
await new Promise<void>((resolve) => setTimeout(resolve, 5000));
}
const cfg: CritiqueConfig = {
...defaultCritiqueConfig(),
// Long timeouts so only the child exit race wins; we don't want the
// per-round or total timer to fire before childExitPromise resolves.
perRoundTimeoutMs: 30_000,
totalTimeoutMs: 30_000,
};
const result = await runOrchestrator({
runId: 'r-child-exit',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: stallingStdout(),
child: stubChild,
childExitPromise,
});
expect(result.status).toBe('failed');
const row = getCritiqueRun(db, 'r-child-exit');
expect(row?.status).toBe('failed');
expect(killCalled).toBe(true);
const failedEvents = events.filter((e) => e.event === 'critique.failed');
expect(failedEvents).toHaveLength(1);
}, 10000);
it('child exits zero before parser completes: parser continues until stream ends', async () => {
const { bus } = makeBus();
const artifactDir = join(tmpDir, 'run-child-exit-zero');
// Child exits with code 0 (zero is not an error).
const childExitPromise = new Promise<{ code: number | null; signal: string | null }>(
(resolve) => setTimeout(() => resolve({ code: 0, signal: null }), 10),
);
// A complete valid 1-round stream that finishes after the child exit.
async function* delayedStream(): AsyncIterable<string> {
await new Promise<void>((r) => setTimeout(r, 30));
yield '<CRITIQUE_RUN version="1" maxRounds="1" threshold="8.0" scale="10">\n';
yield ' <ROUND n="1">\n';
yield ' <PANELIST role="designer"><NOTES>v1</NOTES><ARTIFACT mime="text/html"><![CDATA[<p>v1</p>]]></ARTIFACT></PANELIST>\n';
yield ' <PANELIST role="critic" score="9.0"><DIM name="h" score="9">ok</DIM></PANELIST>\n';
yield ' <PANELIST role="brand" score="9.0"><DIM name="v" score="9">ok</DIM></PANELIST>\n';
yield ' <PANELIST role="a11y" score="9.0"><DIM name="c" score="9">ok</DIM></PANELIST>\n';
yield ' <PANELIST role="copy" score="9.0"><DIM name="cl" score="9">ok</DIM></PANELIST>\n';
yield ' <ROUND_END n="1" composite="9.0" must_fix="0" decision="ship"><REASON>ok</REASON></ROUND_END>\n';
yield ' </ROUND>\n';
yield ' <SHIP round="1" composite="9.0" status="shipped">\n';
yield ' <ARTIFACT mime="text/html"><![CDATA[<p>final</p>]]></ARTIFACT>\n';
yield ' <SUMMARY>done</SUMMARY>\n';
yield ' </SHIP>\n';
yield '</CRITIQUE_RUN>\n';
}
const result = await runOrchestrator({
runId: 'r-child-exit-zero',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg: defaultCritiqueConfig(),
db,
bus,
stdout: delayedStream(),
childExitPromise,
});
// Zero exit does not disrupt the parser; it should complete as shipped.
expect(result.status).toBe('shipped');
}, 10000);
});
// ---------------------------------------------------------------------------
// Timeout / abort best-so-far fallback (Defect 7)
// ---------------------------------------------------------------------------
describe('runOrchestrator - fallback on timeout/abort (Defect 7)', () => {
it('timeout after 2 completed rounds: status=timed_out, score=max(composite)', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-timeout-fallback');
// 2 complete rounds then stall.
// Two complete rounds. After emitting both ROUND_END events, stall so the
// total-timeout fires and the orchestrator elects a fallback round.
const twoRounds = `<CRITIQUE_RUN version="1" maxRounds="3" threshold="9.0" scale="10">
<ROUND n="1">
<PANELIST role="designer"><NOTES>v1</NOTES><ARTIFACT mime="text/html"><![CDATA[<p>v1</p>]]></ARTIFACT></PANELIST>
<PANELIST role="critic" score="6.0"><DIM name="h" score="6">ok</DIM></PANELIST>
<PANELIST role="brand" score="6.0"><DIM name="v" score="6">ok</DIM></PANELIST>
<PANELIST role="a11y" score="6.0"><DIM name="c" score="6">ok</DIM></PANELIST>
<PANELIST role="copy" score="6.0"><DIM name="cl" score="6">ok</DIM></PANELIST>
<ROUND_END n="1" composite="6.0" must_fix="0" decision="continue"><REASON>continue</REASON></ROUND_END>
</ROUND>
<ROUND n="2">
<PANELIST role="designer"><NOTES>v2</NOTES></PANELIST>
<PANELIST role="critic" score="7.5"><DIM name="h" score="7">better</DIM></PANELIST>
<PANELIST role="brand" score="7.5"><DIM name="v" score="7">ok</DIM></PANELIST>
<PANELIST role="a11y" score="7.5"><DIM name="c" score="7">ok</DIM></PANELIST>
<PANELIST role="copy" score="7.5"><DIM name="cl" score="7">ok</DIM></PANELIST>
<ROUND_END n="2" composite="7.5" must_fix="0" decision="continue"><REASON>continue</REASON></ROUND_END>
</ROUND>`;
async function* stallingAfterTwoRounds(): AsyncIterable<string> {
yield* streamOf(twoRounds, 64);
// After both rounds land, stall so the total-timeout fires.
await new Promise<void>((resolve) => setTimeout(resolve, 10_000));
}
const cfg: CritiqueConfig = {
...defaultCritiqueConfig(),
// Per-round timeout starts when the first panelist_open of a new round
// fires. Set it to 200ms so it fires quickly once the stall begins.
// Total timeout is long so only the per-round timer fires.
// But we yield the stall after ROUND_END so no panelist_open is active.
// Use total timeout of 300ms which fires after the stall begins.
perRoundTimeoutMs: 60_000,
totalTimeoutMs: 300,
fallbackPolicy: 'ship_best',
};
const result = await runOrchestrator({
runId: 'r-timeout-fallback',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: stallingAfterTwoRounds(),
});
expect(result.status).toBe('timed_out');
// Best round is 2 with composite 7.5.
expect(result.composite).toBeCloseTo(7.5, 1);
const row = getCritiqueRun(db, 'r-timeout-fallback');
expect(row?.status).toBe('timed_out');
expect(row?.score).toBeCloseTo(7.5, 1);
// A synthetic ship event should have been emitted.
const shipEvents = events.filter((e) => e.event === 'critique.ship');
expect(shipEvents).toHaveLength(1);
}, 15000);
it('abort after 1 completed round: status=interrupted, score matches that round', async () => {
const { bus, events } = makeBus();
const artifactDir = join(tmpDir, 'run-abort-fallback');
const controller = new AbortController();
const oneRound = `<CRITIQUE_RUN version="1" maxRounds="3" threshold="9.0" scale="10">
<ROUND n="1">
<PANELIST role="designer"><NOTES>v1</NOTES><ARTIFACT mime="text/html"><![CDATA[<p>v1</p>]]></ARTIFACT></PANELIST>
<PANELIST role="critic" score="8.0"><DIM name="h" score="8">ok</DIM></PANELIST>
<PANELIST role="brand" score="8.0"><DIM name="v" score="8">ok</DIM></PANELIST>
<PANELIST role="a11y" score="8.0"><DIM name="c" score="8">ok</DIM></PANELIST>
<PANELIST role="copy" score="8.0"><DIM name="cl" score="8">ok</DIM></PANELIST>
<ROUND_END n="1" composite="8.0" must_fix="0" decision="continue"><REASON>continue</REASON></ROUND_END>
</ROUND>`;
async function* abortAfterRound(): AsyncIterable<string> {
yield* streamOf(oneRound, 64);
controller.abort();
// One more yield after abort to ensure the abort is caught.
yield ' <ROUND n="2">\n';
}
const cfg: CritiqueConfig = {
...defaultCritiqueConfig(),
fallbackPolicy: 'ship_best',
};
const result = await runOrchestrator({
runId: 'r-abort-fallback',
projectId: 'p1',
conversationId: null,
artifactId: 'a1',
artifactDir,
adapter: 'claude',
cfg,
db,
bus,
stdout: abortAfterRound(),
signal: controller.signal,
});
expect(result.status).toBe('interrupted');
expect(result.composite).toBeCloseTo(8.0, 1);
const row = getCritiqueRun(db, 'r-abort-fallback');
expect(row?.status).toBe('interrupted');
expect(row?.score).toBeCloseTo(8.0, 1);
const shipEvents = events.filter((e) => e.event === 'critique.ship');
expect(shipEvents).toHaveLength(1);
});
});