PEL/TRACE-DAG: wire exec_result_ref + node failure diagnostics

Persist pre-trace ExecutionResult to embed exec_result_ref in traces
Capture node-level runtime diagnostics and clone into trace artifacts
Clarify trace spec for pre-trace result linkage
Add tests for exec_result_ref and node-failure diagnostics
This commit is contained in:
Carl Niklas Rydberg 2025-12-22 11:16:23 +01:00
parent 85b37e1701
commit a4932b1217
6 changed files with 398 additions and 4 deletions

View file

@ -36,7 +36,7 @@ Status legend: ✅ completed, ⬜ pending.
8. ✅ `tier1/pel-program-dag-desc-1.md`
9. ✅ `tier1/enc-pel-program-dag-1.md`
10. ✅ `tier1/enc-pel1-result-1.md`
11. `tier1/pel-trace-dag-1.md`
11. `tier1/pel-trace-dag-1.md`
12. ⬜ `tier1/enc-pel-trace-dag-1.md`
13. ⬜ `tier1/tgk-1-core.md`
14. ⬜ `tier1/enc-tgk1-edge-1.md`
@ -155,5 +155,21 @@ Status legend: ✅ completed, ⬜ pending.
`store_failure` ↔ status coupling; decoder accepted out-of-range status/kind.
- Resolution: enforced invariants in encoder; added strict status/kind checks
in decoder; added invariant regression tests.
- Tests: not run (new runtime-diagnostics test added after prior user-reported
pass of 14 tests).
## 2025-12-22 — PEL/TRACE-DAG/1 (`tier1/pel-trace-dag-1.md`)
- Scope: trace artifact construction, node-level trace semantics, and surface
wiring for Exec_DAG runs.
- Findings: trace artifacts never include `exec_result_ref` even when a surface
`ExecutionResult` Artifact is persisted; node-level diagnostics are always
empty (including `NODE_FAILED`, which SHOULD carry at least one deterministic
diagnostic entry).
- Resolution: wired `exec_result_ref` into trace construction by persisting an
initial surface result (pre-trace) to obtain `exec_result_ref`, then encoding
the trace with that reference, then persisting the final surface result with
`trace_ref` (note: this produces two result Artifacts for a successful run);
per-node runtime diagnostics are now captured and copied into trace entries
for failed nodes.
- Tests: command not provided — pass (user reported “100% tests passed, 0 tests
failed out of 14”).

View file

@ -67,6 +67,8 @@ typedef struct {
uint32_t status_code;
amduat_artifact_t *outputs;
size_t outputs_len;
amduat_pel_diagnostic_entry_t *diagnostics;
size_t diagnostics_len;
} amduat_pel_program_dag_node_result_t;
typedef struct {

View file

@ -138,6 +138,106 @@ static bool amduat_diag_setf(
return true;
}
static void amduat_node_diag_free(
amduat_pel_program_dag_node_result_t *entry) {
size_t i;
if (entry == NULL || entry->diagnostics == NULL) {
return;
}
for (i = 0; i < entry->diagnostics_len; ++i) {
amduat_octets_free(&entry->diagnostics[i].message);
}
free(entry->diagnostics);
entry->diagnostics = NULL;
entry->diagnostics_len = 0;
}
static bool amduat_node_diag_set(
amduat_pel_program_dag_node_result_t *entry,
uint32_t code,
const uint8_t *message,
size_t message_len) {
amduat_pel_diagnostic_entry_t *entries;
uint8_t *message_copy = NULL;
if (entry == NULL) {
return false;
}
amduat_node_diag_free(entry);
if (message_len != 0) {
message_copy = (uint8_t *)malloc(message_len);
if (message_copy == NULL) {
return false;
}
if (message != NULL) {
memcpy(message_copy, message, message_len);
}
}
entries = (amduat_pel_diagnostic_entry_t *)calloc(1, sizeof(*entries));
if (entries == NULL) {
free(message_copy);
return false;
}
entries[0].code = code;
entries[0].message = amduat_octets(message_copy, message_len);
entry->diagnostics = entries;
entry->diagnostics_len = 1;
return true;
}
static bool amduat_node_diag_setf(
amduat_pel_program_dag_node_result_t *entry,
uint32_t code,
const char *fmt,
...) {
va_list ap;
va_list ap_copy;
int needed;
size_t message_len;
uint8_t *buffer;
bool ok;
if (entry == NULL || fmt == NULL) {
return false;
}
va_start(ap, fmt);
va_copy(ap_copy, ap);
needed = vsnprintf(NULL, 0, fmt, ap_copy);
va_end(ap_copy);
if (needed < 0) {
va_end(ap);
return false;
}
message_len = (size_t)needed;
buffer = NULL;
if (message_len != 0) {
buffer = (uint8_t *)malloc(message_len + 1);
if (buffer == NULL) {
va_end(ap);
return false;
}
if (vsnprintf((char *)buffer, message_len + 1, fmt, ap) < 0) {
free(buffer);
va_end(ap);
return false;
}
}
va_end(ap);
ok = amduat_node_diag_set(entry, code, buffer, message_len);
free(buffer);
return ok;
}
enum {
AMDUAT_PEL_DAG_DIAG_STRUCTURAL_INVALID = 0x00010001u,
AMDUAT_PEL_DAG_DIAG_UNKNOWN_OP = 0x00010002u,
@ -150,7 +250,8 @@ enum {
AMDUAT_PEL_DAG_DIAG_ROOT_OUTPUT_INDEX = 0x00010009u,
AMDUAT_PEL_DAG_DIAG_INVALID_INPUT_INDEX = 0x00020001u,
AMDUAT_PEL_DAG_DIAG_OUTPUT_INDEX = 0x0001000au,
AMDUAT_PEL_DAG_DIAG_RUNTIME_FAILED = 0x00030001u
AMDUAT_PEL_DAG_DIAG_RUNTIME_FAILED = 0x00030001u,
AMDUAT_PEL_DAG_DIAG_NODE_RUNTIME_FAILED = 0x00030002u
};
typedef enum {
@ -431,6 +532,7 @@ static void amduat_node_results_free(
free(entry->outputs);
entry->outputs = NULL;
entry->outputs_len = 0;
amduat_node_diag_free(entry);
}
free(node_results);
}
@ -987,6 +1089,15 @@ static bool amduat_pel_program_dag_exec_internal(
}
node_results[node_index].status = AMDUAT_PEL_NODE_TRACE_FAILED;
node_results[node_index].status_code = status_code;
(void)amduat_node_diag_setf(
&node_results[node_index],
AMDUAT_PEL_DAG_DIAG_NODE_RUNTIME_FAILED,
"runtime failed: node %u (%.*s@%u) status_code %u",
(unsigned int)node->id,
(int)node->op.name.len,
(const char *)node->op.name.data,
(unsigned int)node->op.version,
(unsigned int)status_code);
any_node_executed = true;
amduat_set_result(out_result, AMDUAT_PEL_EXEC_STATUS_RUNTIME_FAILED,
AMDUAT_PEL_EXEC_ERROR_RUNTIME, status_code);

View file

@ -204,6 +204,15 @@ static void amduat_trace_nodes_free(amduat_pel_node_trace_dag_t *nodes,
node->output_refs = NULL;
node->output_refs_len = 0;
}
if (node->diagnostics != NULL) {
size_t j;
for (j = 0; j < node->diagnostics_len; ++j) {
amduat_octets_free(&node->diagnostics[j].message);
}
free(node->diagnostics);
node->diagnostics = NULL;
node->diagnostics_len = 0;
}
}
free(nodes);
}
@ -216,6 +225,8 @@ static bool amduat_store_trace(
size_t input_refs_len,
bool has_params_ref,
amduat_reference_t params_ref,
bool has_exec_result_ref,
amduat_reference_t exec_result_ref,
const amduat_pel_execution_result_value_t *core_result,
const amduat_pel_program_t *program,
const amduat_pel_program_dag_trace_t *trace_eval,
@ -259,6 +270,25 @@ static bool amduat_store_trace(
trace_node->diagnostics = NULL;
trace_node->diagnostics_len = 0;
if (state->diagnostics_len != 0) {
size_t j;
trace_node->diagnostics = (amduat_pel_diagnostic_entry_t *)calloc(
state->diagnostics_len, sizeof(*trace_node->diagnostics));
if (trace_node->diagnostics == NULL) {
amduat_trace_nodes_free(node_traces, node_count);
return false;
}
trace_node->diagnostics_len = state->diagnostics_len;
for (j = 0; j < state->diagnostics_len; ++j) {
trace_node->diagnostics[j].code = state->diagnostics[j].code;
if (!amduat_octets_clone(state->diagnostics[j].message,
&trace_node->diagnostics[j].message)) {
amduat_trace_nodes_free(node_traces, node_count);
return false;
}
}
}
if (state->status == AMDUAT_PEL_NODE_TRACE_OK &&
state->outputs_len != 0) {
size_t j;
@ -287,7 +317,10 @@ static bool amduat_store_trace(
trace.program_ref = program_ref;
trace.status = core_result->status;
trace.summary = core_result->summary;
trace.has_exec_result_ref = false;
trace.has_exec_result_ref = has_exec_result_ref;
if (has_exec_result_ref) {
trace.exec_result_ref = exec_result_ref;
}
trace.input_refs = (amduat_reference_t *)input_refs;
trace.input_refs_len = input_refs_len;
trace.has_params_ref = has_params_ref;
@ -335,11 +368,13 @@ bool amduat_pel_surf_run(amduat_asl_store_t *store,
amduat_reference_t *output_refs;
size_t output_refs_len;
amduat_reference_t trace_ref;
amduat_reference_t exec_result_ref;
size_t i;
bool program_decoded = false;
bool has_params_artifact = false;
bool exec_invoked = false;
bool trace_ok = false;
bool has_exec_result_ref = false;
if (store == NULL || out_output_refs == NULL ||
out_output_refs_len == NULL || out_result_ref == NULL) {
@ -355,6 +390,8 @@ bool amduat_pel_surf_run(amduat_asl_store_t *store,
out_result_ref->digest = amduat_octets(NULL, 0);
trace_ref.hash_id = 0;
trace_ref.digest = amduat_octets(NULL, 0);
exec_result_ref.hash_id = 0;
exec_result_ref.digest = amduat_octets(NULL, 0);
if (!amduat_reference_eq(scheme_ref,
amduat_pel_program_dag_scheme_ref())) {
@ -558,9 +595,17 @@ bool amduat_pel_surf_run(amduat_asl_store_t *store,
trace_ok = false;
if (exec_invoked) {
if (!amduat_store_surface_result(
store, &core_result, scheme_ref, program_ref, input_refs,
input_refs_len, output_refs, output_refs_len, has_params_ref,
params_ref, false, NULL, false, trace_ref, &exec_result_ref)) {
goto cleanup;
}
has_exec_result_ref = true;
trace_ok = amduat_store_trace(
store, scheme_ref, program_ref, input_refs, input_refs_len,
has_params_ref, params_ref, &core_result,
has_params_ref, params_ref, has_exec_result_ref, exec_result_ref,
&core_result,
program_decoded ? &program : NULL,
program_decoded ? &trace_eval : NULL, &trace_ref);
if (!trace_ok) {
@ -591,6 +636,7 @@ bool amduat_pel_surf_run(amduat_asl_store_t *store,
}
amduat_pel_program_dag_free_outputs(outputs, outputs_len);
amduat_pel_execution_result_free(&core_result);
amduat_pel_surf_free_ref(&exec_result_ref);
return true;
cleanup:
@ -608,5 +654,6 @@ cleanup:
}
amduat_pel_program_dag_free_outputs(outputs, outputs_len);
amduat_pel_execution_result_free(&core_result);
amduat_pel_surf_free_ref(&exec_result_ref);
return false;
}

View file

@ -5,6 +5,7 @@
#include "amduat/enc/pel_program_dag.h"
#include "amduat/enc/pel_trace_dag.h"
#include "amduat/hash/asl1.h"
#include "amduat/pel/opreg_kernel.h"
#include "amduat/pel/program_dag.h"
#include "amduat/pel/program_dag_desc.h"
#include "amduat/pel/surf.h"
@ -345,6 +346,44 @@ static bool build_concat_program_artifact(amduat_artifact_t *out_artifact) {
return true;
}
static bool build_concat2_program_artifact(amduat_artifact_t *out_artifact) {
amduat_pel_dag_input_t inputs[2];
amduat_pel_node_t nodes[1];
amduat_pel_root_ref_t roots[1];
amduat_pel_program_t program;
amduat_octets_t encoded;
const char op_concat[] = "pel.bytes.concat";
inputs[0].kind = AMDUAT_PEL_DAG_INPUT_EXTERNAL;
inputs[0].value.external.input_index = 0;
inputs[1].kind = AMDUAT_PEL_DAG_INPUT_EXTERNAL;
inputs[1].value.external.input_index = 1;
nodes[0].id = 1;
nodes[0].op.name = amduat_octets(op_concat, strlen(op_concat));
nodes[0].op.version = 1;
nodes[0].inputs = inputs;
nodes[0].inputs_len = 2;
nodes[0].params = amduat_octets(NULL, 0);
roots[0].node_id = 1;
roots[0].output_index = 0;
program.nodes = nodes;
program.nodes_len = 1;
program.roots = roots;
program.roots_len = 1;
encoded = amduat_octets(NULL, 0);
if (!amduat_enc_pel_program_dag_encode_v1(&program, &encoded)) {
return false;
}
*out_artifact = amduat_artifact_with_type(
encoded, amduat_type_tag(AMDUAT_PEL_TYPE_TAG_PROGRAM_DAG_1));
return true;
}
static bool build_params_program_artifact(amduat_artifact_t *out_artifact) {
amduat_pel_node_t nodes[1];
amduat_pel_root_ref_t roots[1];
@ -401,6 +440,8 @@ static int test_surf_success(void) {
amduat_pel_surface_execution_result_t decoded;
amduat_artifact_t trace_artifact;
amduat_pel_trace_dag_value_t trace;
amduat_artifact_t exec_result_artifact;
amduat_pel_surface_execution_result_t exec_result_decoded;
const char op_const[] = "pel.bytes.const";
int exit_code = 1;
@ -492,6 +533,34 @@ static int test_surf_success(void) {
}
artifact_free(&trace_artifact);
if (!trace.has_exec_result_ref) {
fprintf(stderr, "missing trace exec_result_ref\n");
amduat_enc_pel_trace_dag_free(&trace);
goto cleanup_decoded;
}
if (stub_store_get(&stub, trace.exec_result_ref, &exec_result_artifact) !=
AMDUAT_ASL_STORE_OK) {
fprintf(stderr, "exec result get failed\n");
amduat_enc_pel_trace_dag_free(&trace);
goto cleanup_decoded;
}
if (!exec_result_artifact.has_type_tag ||
exec_result_artifact.type_tag.tag_id != AMDUAT_TYPE_TAG_PEL1_RESULT_1) {
fprintf(stderr, "exec result type tag mismatch\n");
artifact_free(&exec_result_artifact);
amduat_enc_pel_trace_dag_free(&trace);
goto cleanup_decoded;
}
if (!amduat_enc_pel1_result_decode_v1(exec_result_artifact.bytes,
&exec_result_decoded)) {
fprintf(stderr, "exec result decode failed\n");
artifact_free(&exec_result_artifact);
amduat_enc_pel_trace_dag_free(&trace);
goto cleanup_decoded;
}
artifact_free(&exec_result_artifact);
amduat_enc_pel1_result_free(&exec_result_decoded);
if (trace.node_traces_len != 1 ||
trace.node_traces[0].status != AMDUAT_PEL_NODE_TRACE_OK ||
trace.node_traces[0].output_refs_len != 1 ||
@ -686,6 +755,145 @@ cleanup_store:
return exit_code;
}
static int test_surf_runtime_failure_trace_diag(void) {
stub_store_t stub;
amduat_asl_store_t store;
amduat_asl_store_ops_t ops;
amduat_asl_store_config_t cfg;
amduat_artifact_t program_artifact;
amduat_reference_t program_ref;
amduat_reference_t input_refs[2];
amduat_reference_t *output_refs = NULL;
size_t output_refs_len = 0;
amduat_reference_t result_ref;
amduat_artifact_t result_artifact;
amduat_pel_surface_execution_result_t decoded;
amduat_artifact_t trace_artifact;
amduat_pel_trace_dag_value_t trace;
char expected_msg[128];
int expected_len;
int exit_code = 1;
uint8_t payload_a[] = {'a'};
uint8_t payload_b[] = {'b'};
amduat_artifact_t input_a;
amduat_artifact_t input_b;
const uint32_t diag_code = 0x00030002u;
cfg.encoding_profile_id = AMDUAT_ENC_ASL1_CORE_V1;
cfg.hash_id = AMDUAT_HASH_ASL1_ID_SHA256;
stub_store_init(&stub);
stub.config = cfg;
amduat_asl_store_ops_init(&ops);
ops.put = stub_store_put;
ops.get = stub_store_get;
amduat_asl_store_init(&store, cfg, ops, &stub);
if (!build_concat2_program_artifact(&program_artifact)) {
fprintf(stderr, "build program failed\n");
goto cleanup_store;
}
if (stub_store_put(&stub, program_artifact, &program_ref) !=
AMDUAT_ASL_STORE_OK) {
fprintf(stderr, "program put failed\n");
free((void *)program_artifact.bytes.data);
goto cleanup_store;
}
free((void *)program_artifact.bytes.data);
input_a = amduat_artifact(amduat_octets(payload_a, sizeof(payload_a)));
input_b = amduat_artifact_with_type(
amduat_octets(payload_b, sizeof(payload_b)),
amduat_type_tag(AMDUAT_TYPE_TAG_PEL1_RESULT_1));
if (stub_store_put(&stub, input_a, &input_refs[0]) !=
AMDUAT_ASL_STORE_OK) {
fprintf(stderr, "input a put failed\n");
goto cleanup_store;
}
if (stub_store_put(&stub, input_b, &input_refs[1]) !=
AMDUAT_ASL_STORE_OK) {
fprintf(stderr, "input b put failed\n");
goto cleanup_store;
}
if (!amduat_pel_surf_run(&store, amduat_pel_program_dag_scheme_ref(),
program_ref, input_refs, 2, false,
amduat_reference(0, amduat_octets(NULL, 0)),
&output_refs, &output_refs_len, &result_ref)) {
fprintf(stderr, "surf run failed\n");
goto cleanup_refs;
}
if (output_refs_len != 0) {
fprintf(stderr, "unexpected output refs\n");
goto cleanup_refs;
}
if (stub_store_get(&stub, result_ref, &result_artifact) !=
AMDUAT_ASL_STORE_OK) {
fprintf(stderr, "result get failed\n");
goto cleanup_refs;
}
if (!amduat_enc_pel1_result_decode_v1(result_artifact.bytes, &decoded)) {
artifact_free(&result_artifact);
fprintf(stderr, "result decode failed\n");
goto cleanup_refs;
}
artifact_free(&result_artifact);
if (decoded.core_result.status != AMDUAT_PEL_EXEC_STATUS_RUNTIME_FAILED ||
decoded.core_result.summary.kind != AMDUAT_PEL_EXEC_ERROR_RUNTIME ||
decoded.core_result.summary.status_code == 0 ||
!decoded.has_trace_ref) {
fprintf(stderr, "runtime result mismatch\n");
goto cleanup_decoded;
}
if (stub_store_get(&stub, decoded.trace_ref, &trace_artifact) !=
AMDUAT_ASL_STORE_OK) {
fprintf(stderr, "trace get failed\n");
goto cleanup_decoded;
}
if (!amduat_enc_pel_trace_dag_decode_v1(trace_artifact.bytes, &trace)) {
fprintf(stderr, "trace decode failed\n");
artifact_free(&trace_artifact);
goto cleanup_decoded;
}
artifact_free(&trace_artifact);
expected_len = snprintf(expected_msg, sizeof(expected_msg),
"runtime failed: node 1 (pel.bytes.concat@1) "
"status_code %u",
(unsigned int)
AMDUAT_PEL_KERNEL_STATUS_CONCAT_TYPE_TAG_MISMATCH);
if (expected_len < 0 ||
trace.node_traces_len != 1 ||
trace.node_traces[0].status != AMDUAT_PEL_NODE_TRACE_FAILED ||
trace.node_traces[0].diagnostics_len < 1 ||
trace.node_traces[0].diagnostics[0].code != diag_code ||
trace.node_traces[0].diagnostics[0].message.len !=
(size_t)expected_len ||
memcmp(trace.node_traces[0].diagnostics[0].message.data,
expected_msg, (size_t)expected_len) != 0) {
fprintf(stderr, "trace runtime diagnostics mismatch\n");
amduat_enc_pel_trace_dag_free(&trace);
goto cleanup_decoded;
}
amduat_enc_pel_trace_dag_free(&trace);
exit_code = 0;
cleanup_decoded:
amduat_enc_pel1_result_free(&decoded);
cleanup_refs:
amduat_pel_surf_free_refs(output_refs, output_refs_len);
amduat_pel_surf_free_ref(&result_ref);
amduat_pel_surf_free_ref(&program_ref);
cleanup_store:
stub_store_free(&stub);
return exit_code;
}
static int test_surf_store_io_failure(void) {
stub_store_t stub;
amduat_asl_store_t store;
@ -868,6 +1076,9 @@ int main(void) {
if (test_surf_missing_input() != 0) {
return 1;
}
if (test_surf_runtime_failure_trace_diag() != 0) {
return 1;
}
if (test_surf_store_io_failure() != 0) {
return 1;
}

View file

@ -379,6 +379,13 @@ Constraints:
* If the run produced a surface-level `ExecutionResult` Artifact (as in `PEL/1-SURF`), this SHOULD be its `Reference`.
* If no such Artifact exists or is not persisted, it MUST be absent.
* If a surface persists an `ExecutionResult` Artifact that includes
`trace_ref`, it MAY still set `exec_result_ref` to a distinct
pre-trace `ExecutionResult` Artifact for the same run to avoid a
circular dependency between Artifacts. In that case, the surface
`ExecutionResult` Artifact that carries `trace_ref` is the canonical
surface result for that run, while `exec_result_ref` exists solely to
link the trace back to an execution result.
* `input_refs`: