Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .server-changes/clickhouse-output-string.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
area: webapp
type: fix
---

Store task run output as serialized JSON text in ClickHouse instead of the native JSON column. Deeply nested output could exceed ClickHouse 26.2's `input_format_binary_max_type_complexity` limit, causing some runs to fail replication and appear stuck.
24 changes: 23 additions & 1 deletion apps/webapp/app/services/runsReplicationService.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,11 @@ export class RunsReplicationService {
_version: bigint
): Promise<TaskRunInsertArray> {
const output = await this.#prepareJson(run.output, run.outputType);
// The serialized output is written to the String `output_raw` column rather than the
// native JSON `output` column. A String has constant binary type complexity, so deeply
// nested payloads can no longer breach the type-complexity ceiling and silently drop the
// terminal row on insert. The JSON `output` column is left empty to keep its type trivial.
const outputRaw = serializeJsonRaw(output.data);
const errorData = { data: run.error };

// Calculate error fingerprint for failed runs
Expand Down Expand Up @@ -1100,7 +1105,7 @@ export class RunsReplicationService {
run.usageDurationMs ?? 0, // usage_duration_ms
run.costInCents ?? 0, // cost_in_cents
run.baseCostInCents ?? 0, // base_cost_in_cents
output, // output
{ data: undefined }, // output (left empty; serialized value lives in output_raw)
errorData, // error
errorFingerprint, // error_fingerprint
run.runTags ?? [], // tags
Expand Down Expand Up @@ -1130,6 +1135,7 @@ export class RunsReplicationService {
annotations?.rootTriggerSource ?? "", // root_trigger_source
annotations?.taskKind ?? "", // task_kind
run.isWarmStart ?? null, // is_warm_start
outputRaw, // output_raw
];
}

Expand Down Expand Up @@ -1369,3 +1375,19 @@ function lsnToUInt64(lsn: string): bigint {
const [seg, off] = lsn.split("/");
return (BigInt("0x" + seg) << 32n) | BigInt("0x" + off);
}

/**
* Serialize an already-parsed JSON value to the text stored in a `*_raw` String column.
* Returns an empty string when there is no value, which the query layer treats as "no data".
*/
function serializeJsonRaw(data: unknown): string {
if (data === undefined) {
return "";
}

try {
return JSON.stringify(data) ?? "";
} catch {
return "";
}
}
8 changes: 5 additions & 3 deletions apps/webapp/app/v3/querySchemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,11 @@ export const runsSchema: TableSchema = {
description: "The data you returned from the task.",
example: '{"result": "success"}',
}),
nullValue: "'{}'", // Transform NULL checks to compare against empty object
textColumn: "output_text", // Use output_text for full JSON value queries
dataPrefix: "data", // Internal data is wrapped in {"data": ...}
// Stored as serialized JSON text in output_raw (a String) rather than the native JSON
// column, so reads/writes can't hit the JSON binary type-complexity ceiling.
nullValue: "''", // Empty raw string means "no output"
textColumn: "output_raw", // Full-value reads/search use the raw String column
rawColumn: "output_raw", // Path access (output.foo) compiles to a JSONExtract bridge over output_raw
Comment thread
matt-aitken marked this conversation as resolved.
},
error: {
name: "error",
Expand Down
8 changes: 6 additions & 2 deletions apps/webapp/test/runsReplicationService.part3.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,17 @@ describe("RunsReplicationService (part 3/7)", () => {
status: "COMPLETED_SUCCESSFULLY",
})
);
expect(found?.output).toBeDefined();
// Output is stored as serialized text in output_raw; the native JSON column stays empty
expect(found?.output).toStrictEqual({});
expect(found?.output_raw).toBe(`{"foo":"bar"}`);
}

// Check the run with the bad JSON
// The run with the bad JSON lands with its output blanked (output_raw empty) rather than
// being dropped, so its terminal status is still recorded.
const foundBad = result?.find((r: any) => r.span_id === "bulk-10");
expect(foundBad).toBeDefined();
expect(foundBad?.output).toStrictEqual({});
expect(foundBad?.output_raw).toBe("");

await runsReplicationService.stop();
}
Expand Down
5 changes: 3 additions & 2 deletions apps/webapp/test/runsReplicationService.part6.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,9 @@ describe("RunsReplicationService (part 6/7)", () => {
expect(parseClickhouseTimestamp(clickhouseRun.queued_at)).toBe(queuedAt.getTime());
expect(parseClickhouseTimestamp(clickhouseRun.expired_at)).toBeNull();

// Output (parsed JSON)
expect(clickhouseRun.output).toEqual({ data: { result: "test-output" } });
// Output is stored as serialized JSON text in output_raw; the native JSON column is empty
expect(clickhouseRun.output).toEqual({});
expect(clickhouseRun.output_raw).toEqual(JSON.stringify({ result: "test-output" }));

// Error
expect(clickhouseRun.error).toEqual({
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- +goose Up
-- Store task output as a serialized JSON String alongside the native JSON `output` column.
-- A String has constant binary type complexity regardless of payload depth/width, so writes
-- and reads can never hit input_format_binary_max_type_complexity the way the JSON type can.
ALTER TABLE trigger_dev.task_runs_v2
ADD COLUMN IF NOT EXISTS output_raw String DEFAULT '';
Comment thread
matt-aitken marked this conversation as resolved.
Comment thread
matt-aitken marked this conversation as resolved.

-- Keep full-text search on output fast now that reads come from output_raw instead of output_text.
ALTER TABLE trigger_dev.task_runs_v2
ADD INDEX IF NOT EXISTS idx_output_raw output_raw TYPE ngrambf_v1 (3, 131072, 3, 0) GRANULARITY 4;

-- +goose Down
ALTER TABLE trigger_dev.task_runs_v2
DROP INDEX IF EXISTS idx_output_raw;

ALTER TABLE trigger_dev.task_runs_v2
DROP COLUMN IF EXISTS output_raw;
8 changes: 8 additions & 0 deletions internal-packages/clickhouse/src/taskRuns.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ describe("Task Runs V2", () => {
"", // root_trigger_source
"", // task_kind
null, // is_warm_start
"", // output_raw
];

const [insertError, insertResult] = await insert([taskRunData]);
Expand Down Expand Up @@ -224,6 +225,7 @@ describe("Task Runs V2", () => {
"", // root_trigger_source
"", // task_kind
null, // is_warm_start
"", // output_raw
];

const run2: TaskRunInsertArray = [
Expand Down Expand Up @@ -281,6 +283,7 @@ describe("Task Runs V2", () => {
"", // root_trigger_source
"", // task_kind
null, // is_warm_start
"", // output_raw
];

const [insertError, insertResult] = await insert([run1, run2]);
Expand Down Expand Up @@ -385,6 +388,7 @@ describe("Task Runs V2", () => {
"", // root_trigger_source
"", // task_kind
null, // is_warm_start
"", // output_raw
];

const [insertError, insertResult] = await insert([taskRun]);
Expand Down Expand Up @@ -497,6 +501,7 @@ describe("Task Runs V2", () => {
"", // root_trigger_source
"", // task_kind
null, // is_warm_start
"", // output_raw
];

const childA_v1: TaskRunInsertArray = [
Expand Down Expand Up @@ -554,6 +559,7 @@ describe("Task Runs V2", () => {
"",
"",
null,
"", // output_raw
];

const childA_v2: TaskRunInsertArray = [...childA_v1];
Expand Down Expand Up @@ -615,6 +621,7 @@ describe("Task Runs V2", () => {
"",
"",
null,
"", // output_raw
];

const childDeleted_v1: TaskRunInsertArray = [
Expand Down Expand Up @@ -672,6 +679,7 @@ describe("Task Runs V2", () => {
"",
"",
null,
"", // output_raw
];

const childDeleted_v2: TaskRunInsertArray = [...childDeleted_v1];
Expand Down
4 changes: 4 additions & 0 deletions internal-packages/clickhouse/src/taskRuns.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export const TaskRunV2 = z.object({
cost_in_cents: z.number().default(0),
base_cost_in_cents: z.number().default(0),
output: z.unknown(),
output_raw: z.string().default(""),
error: z.unknown(),
error_fingerprint: z.string().default(""),
tags: z.array(z.string()).default([]),
Expand Down Expand Up @@ -117,6 +118,7 @@ export const TASK_RUN_COLUMNS = [
"root_trigger_source",
"task_kind",
"is_warm_start",
"output_raw",
] as const;

export type TaskRunColumnName = (typeof TASK_RUN_COLUMNS)[number];
Expand Down Expand Up @@ -186,6 +188,7 @@ export type TaskRunFieldTypes = {
root_trigger_source: string;
task_kind: string;
is_warm_start: boolean | null;
output_raw: string;
};

/**
Expand Down Expand Up @@ -326,6 +329,7 @@ export type TaskRunInsertArray = [
root_trigger_source: string,
task_kind: string,
is_warm_start: boolean | null,
output_raw: string,
];

/**
Expand Down
Loading
Loading