commit b47b9142244e12a34a450d845b01be201b526fdf Author: Carl Niklas Rydberg Date: Fri Dec 19 19:22:40 2025 +0100 Scaffold C layout and ASL registry model diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..5691f06 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,130 @@ +cmake_minimum_required(VERSION 3.20) +project(amduat LANGUAGES C) + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS OFF) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(AMDUAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) +set(AMDUAT_INCLUDE_DIR ${AMDUAT_ROOT}/include) +set(AMDUAT_INTERNAL_DIR ${AMDUAT_ROOT}/src/internal) + +function(amduat_add_lib name) + set(options) + set(oneValueArgs) + set(multiValueArgs SRCS) + cmake_parse_arguments(AMDUAT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if ("${AMDUAT_SRCS}" STREQUAL "") + message(FATAL_ERROR "amduat_add_lib(${name}) requires SRCS") + endif() + + add_library(amduat_${name}_obj OBJECT ${AMDUAT_SRCS}) + target_include_directories(amduat_${name}_obj + PRIVATE ${AMDUAT_INTERNAL_DIR} + PUBLIC ${AMDUAT_INCLUDE_DIR} + ) + + add_library(amduat_${name} STATIC $) + add_library(amduat_${name}_shared SHARED $) + + set_target_properties(amduat_${name} PROPERTIES OUTPUT_NAME amduat_${name}) + set_target_properties(amduat_${name}_shared PROPERTIES OUTPUT_NAME amduat_${name}) + + target_include_directories(amduat_${name} + PRIVATE ${AMDUAT_INTERNAL_DIR} + PUBLIC ${AMDUAT_INCLUDE_DIR} + ) + target_include_directories(amduat_${name}_shared + PRIVATE ${AMDUAT_INTERNAL_DIR} + PUBLIC ${AMDUAT_INCLUDE_DIR} + ) +endfunction() + +function(amduat_link name) + if (ARGC LESS 2) + return() + endif() + + target_link_libraries(amduat_${name} PUBLIC ${ARGN}) + target_link_libraries(amduat_${name}_shared PUBLIC ${ARGN}) +endfunction() + +set(AMDUAT_UTIL_SRCS + src/internal/buf.c + src/internal/arena.c + src/internal/varint.c + src/internal/endian.c +) + +set(AMDUAT_ASL_SRCS + src/kernel/asl/core.c + src/near_core/asl/store.c + src/near_core/asl/registry.c +) + +set(AMDUAT_HASH_ASL1_SRCS + src/near_core/hash/asl1.c +) + +set(AMDUAT_ENC_SRCS + src/near_core/enc/asl1_core.c + src/near_core/enc/pel_program_dag.c + src/near_core/enc/pel_trace_dag.c + src/near_core/enc/tgk1_edge.c +) + +set(AMDUAT_PEL_SRCS + src/kernel/pel/core.c + src/pel_stack/surf/surf.c + src/pel_stack/program_dag/program_dag.c + src/pel_stack/program_dag/program_dag_desc.c + src/pel_stack/trace_dag/trace_dag.c + src/pel_stack/opreg/kernel.c + src/pel_stack/opreg/kernel_params.c +) + +set(AMDUAT_TGK_SRCS + src/kernel/tgk/core.c + src/tgk_stack/store/store.c + src/tgk_stack/prov/prov.c +) + +set(AMDUAT_ASL_STORE_FS_SRCS + src/adapters/asl_store_fs/asl_store_fs.c +) + +set(AMDUAT_TGK_STORE_MEM_SRCS + src/adapters/tgk_store_mem/tgk_store_mem.c +) + +set(AMDUAT_TGK_STORE_FS_SRCS + src/adapters/tgk_store_fs/tgk_store_fs.c +) + +amduat_add_lib(util SRCS ${AMDUAT_UTIL_SRCS}) + +amduat_add_lib(asl SRCS ${AMDUAT_ASL_SRCS}) +amduat_link(asl amduat_util) + +amduat_add_lib(hash_asl1 SRCS ${AMDUAT_HASH_ASL1_SRCS}) +amduat_link(hash_asl1 amduat_asl amduat_util) + +amduat_add_lib(enc SRCS ${AMDUAT_ENC_SRCS}) +amduat_link(enc amduat_asl amduat_hash_asl1 amduat_util) + +amduat_add_lib(pel SRCS ${AMDUAT_PEL_SRCS}) +amduat_link(pel amduat_asl amduat_enc amduat_hash_asl1 amduat_util) + +amduat_add_lib(tgk SRCS ${AMDUAT_TGK_SRCS}) +amduat_link(tgk amduat_asl amduat_enc amduat_hash_asl1 amduat_util) + +amduat_add_lib(asl_store_fs SRCS ${AMDUAT_ASL_STORE_FS_SRCS}) +amduat_link(asl_store_fs amduat_asl amduat_enc amduat_hash_asl1 amduat_util) + +amduat_add_lib(tgk_store_mem SRCS ${AMDUAT_TGK_STORE_MEM_SRCS}) +amduat_link(tgk_store_mem amduat_tgk amduat_asl amduat_enc amduat_hash_asl1 amduat_util) + +amduat_add_lib(tgk_store_fs SRCS ${AMDUAT_TGK_STORE_FS_SRCS}) +amduat_link(tgk_store_fs amduat_tgk amduat_asl_store_fs amduat_asl amduat_enc amduat_hash_asl1 amduat_util) diff --git a/include/amduat/asl/asl_store_fs.h b/include/amduat/asl/asl_store_fs.h new file mode 100644 index 0000000..64a0d56 --- /dev/null +++ b/include/amduat/asl/asl_store_fs.h @@ -0,0 +1,6 @@ +#ifndef AMDUAT_ASL_STORE_FS_H +#define AMDUAT_ASL_STORE_FS_H + +/* Filesystem-backed ASL store adapter public API. */ + +#endif /* AMDUAT_ASL_STORE_FS_H */ diff --git a/include/amduat/asl/core.h b/include/amduat/asl/core.h new file mode 100644 index 0000000..f71cd1b --- /dev/null +++ b/include/amduat/asl/core.h @@ -0,0 +1,53 @@ +#ifndef AMDUAT_ASL_CORE_H +#define AMDUAT_ASL_CORE_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Octet views are borrowed; callers own the backing memory. */ +typedef struct { + const uint8_t *data; + size_t len; +} amduat_octets_t; + +typedef uint16_t amduat_hash_id_t; + +typedef struct { + uint32_t tag_id; +} amduat_type_tag_t; + +typedef struct { + amduat_hash_id_t hash_id; + amduat_octets_t digest; +} amduat_reference_t; + +typedef struct { + amduat_octets_t bytes; + bool has_type_tag; + amduat_type_tag_t type_tag; +} amduat_artifact_t; + +amduat_octets_t amduat_octets(const void *data, size_t len); +bool amduat_octets_eq(amduat_octets_t a, amduat_octets_t b); +bool amduat_octets_is_empty(amduat_octets_t v); + +amduat_type_tag_t amduat_type_tag(uint32_t tag_id); +bool amduat_type_tag_eq(amduat_type_tag_t a, amduat_type_tag_t b); + +amduat_reference_t amduat_reference(amduat_hash_id_t hash_id, amduat_octets_t digest); +bool amduat_reference_eq(amduat_reference_t a, amduat_reference_t b); + +amduat_artifact_t amduat_artifact(amduat_octets_t bytes); +amduat_artifact_t amduat_artifact_with_type(amduat_octets_t bytes, amduat_type_tag_t type_tag); +bool amduat_artifact_eq(amduat_artifact_t a, amduat_artifact_t b); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AMDUAT_ASL_CORE_H */ diff --git a/include/amduat/asl/registry.h b/include/amduat/asl/registry.h new file mode 100644 index 0000000..1b375c4 --- /dev/null +++ b/include/amduat/asl/registry.h @@ -0,0 +1,82 @@ +#ifndef AMDUAT_ASL_REGISTRY_H +#define AMDUAT_ASL_REGISTRY_H + +#include "amduat/asl/core.h" +#include "amduat/asl/store.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Registry values are immutable artifacts with entries ordered by key bytes. */ +typedef struct { + amduat_octets_t key; + amduat_reference_t value_ref; +} amduat_asl_registry_entry_t; + +typedef struct { + amduat_asl_registry_entry_t *entries; + size_t len; + size_t cap; +} amduat_asl_registry_value_t; + +void amduat_asl_registry_value_init(amduat_asl_registry_value_t *value, + amduat_asl_registry_entry_t *entries, + size_t cap); + +int amduat_asl_registry_key_cmp(amduat_octets_t a, amduat_octets_t b); + +bool amduat_asl_registry_value_insert(amduat_asl_registry_value_t *value, + amduat_asl_registry_entry_t entry); + +const amduat_asl_registry_entry_t *amduat_asl_registry_value_lookup( + const amduat_asl_registry_value_t *value, + amduat_octets_t key); + +typedef struct { + bool (*encode)(void *ctx, + const amduat_asl_registry_value_t *value, + amduat_octets_t *out_bytes); + bool (*decode)(void *ctx, + amduat_octets_t bytes, + amduat_asl_registry_value_t *out_value); + void *ctx; + bool has_type_tag; + amduat_type_tag_t type_tag; +} amduat_asl_registry_codec_t; + +typedef enum { + AMDUAT_ASL_REGISTRY_OK = 0, + AMDUAT_ASL_REGISTRY_ERR_CODEC = 1, + AMDUAT_ASL_REGISTRY_ERR_STORE = 2 +} amduat_asl_registry_error_t; + +typedef struct { + amduat_asl_store_t *store; + amduat_asl_registry_codec_t codec; +} amduat_asl_registry_store_t; + +void amduat_asl_registry_store_init(amduat_asl_registry_store_t *reg, + amduat_asl_store_t *store, + amduat_asl_registry_codec_t codec); + +amduat_asl_registry_error_t amduat_asl_registry_store_put( + amduat_asl_registry_store_t *reg, + const amduat_asl_registry_value_t *value, + amduat_reference_t *out_ref, + amduat_asl_store_error_t *out_store_err); + +amduat_asl_registry_error_t amduat_asl_registry_store_get( + amduat_asl_registry_store_t *reg, + amduat_reference_t ref, + amduat_asl_registry_value_t *out_value, + amduat_asl_store_error_t *out_store_err); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AMDUAT_ASL_REGISTRY_H */ diff --git a/include/amduat/asl/store.h b/include/amduat/asl/store.h new file mode 100644 index 0000000..fed8b23 --- /dev/null +++ b/include/amduat/asl/store.h @@ -0,0 +1,60 @@ +#ifndef AMDUAT_ASL_STORE_H +#define AMDUAT_ASL_STORE_H + +#include "amduat/asl/core.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint16_t amduat_asl_encoding_profile_id_t; + +typedef struct { + amduat_asl_encoding_profile_id_t encoding_profile_id; + amduat_hash_id_t hash_id; +} amduat_asl_store_config_t; + +typedef enum { + AMDUAT_ASL_STORE_OK = 0, + AMDUAT_ASL_STORE_ERR_INTEGRITY = 1, + AMDUAT_ASL_STORE_ERR_UNSUPPORTED = 2, + AMDUAT_ASL_STORE_ERR_NOT_FOUND = 3 +} amduat_asl_store_error_t; + +typedef struct { + amduat_asl_store_error_t (*put)(void *ctx, + amduat_artifact_t artifact, + amduat_reference_t *out_ref); + amduat_asl_store_error_t (*get)(void *ctx, + amduat_reference_t ref, + amduat_artifact_t *out_artifact); +} amduat_asl_store_ops_t; + +typedef struct { + amduat_asl_store_config_t config; + amduat_asl_store_ops_t ops; + void *ctx; +} amduat_asl_store_t; + +void amduat_asl_store_init(amduat_asl_store_t *store, + amduat_asl_store_config_t config, + amduat_asl_store_ops_t ops, + void *ctx); + +amduat_asl_store_error_t amduat_asl_store_put(amduat_asl_store_t *store, + amduat_artifact_t artifact, + amduat_reference_t *out_ref); + +amduat_asl_store_error_t amduat_asl_store_get(amduat_asl_store_t *store, + amduat_reference_t ref, + amduat_artifact_t *out_artifact); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AMDUAT_ASL_STORE_H */ diff --git a/include/amduat/enc/asl1_core.h b/include/amduat/enc/asl1_core.h new file mode 100644 index 0000000..93c1dfe --- /dev/null +++ b/include/amduat/enc/asl1_core.h @@ -0,0 +1,28 @@ +#ifndef AMDUAT_ENC_ASL1_CORE_H +#define AMDUAT_ENC_ASL1_CORE_H + +#include "amduat/asl/store.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + amduat_asl_encoding_profile_id_t profile_id; + const char *name; + uint16_t version; + void *impl; +} amduat_enc_asl1_core_profile_desc_t; + +amduat_octets_t amduat_enc_asl1_core_key( + amduat_asl_encoding_profile_id_t profile_id, + uint8_t out[2]); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AMDUAT_ENC_ASL1_CORE_H */ diff --git a/include/amduat/enc/pel_program_dag.h b/include/amduat/enc/pel_program_dag.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/enc/pel_trace_dag.h b/include/amduat/enc/pel_trace_dag.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/enc/tgk1_edge.h b/include/amduat/enc/tgk1_edge.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/hash/asl1.h b/include/amduat/hash/asl1.h new file mode 100644 index 0000000..a1b441b --- /dev/null +++ b/include/amduat/hash/asl1.h @@ -0,0 +1,26 @@ +#ifndef AMDUAT_HASH_ASL1_H +#define AMDUAT_HASH_ASL1_H + +#include "amduat/asl/core.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + amduat_hash_id_t hash_id; + const char *name; + size_t digest_len; + void *impl; +} amduat_hash_asl1_desc_t; + +amduat_octets_t amduat_hash_asl1_key(amduat_hash_id_t hash_id, uint8_t out[2]); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AMDUAT_HASH_ASL1_H */ diff --git a/include/amduat/pel/core.h b/include/amduat/pel/core.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/pel/opreg_kernel.h b/include/amduat/pel/opreg_kernel.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/pel/opreg_kernel_params.h b/include/amduat/pel/opreg_kernel_params.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/pel/program_dag.h b/include/amduat/pel/program_dag.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/pel/program_dag_desc.h b/include/amduat/pel/program_dag_desc.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/pel/surf.h b/include/amduat/pel/surf.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/pel/trace_dag.h b/include/amduat/pel/trace_dag.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/tgk/core.h b/include/amduat/tgk/core.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/tgk/prov.h b/include/amduat/tgk/prov.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/tgk/store.h b/include/amduat/tgk/store.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/tgk/tgk_store_fs.h b/include/amduat/tgk/tgk_store_fs.h new file mode 100644 index 0000000..387b020 --- /dev/null +++ b/include/amduat/tgk/tgk_store_fs.h @@ -0,0 +1,6 @@ +#ifndef AMDUAT_TGK_TGK_STORE_FS_H +#define AMDUAT_TGK_TGK_STORE_FS_H + +/* Filesystem-backed TGK store adapter public API. */ + +#endif /* AMDUAT_TGK_TGK_STORE_FS_H */ diff --git a/include/amduat/tgk/tgk_store_mem.h b/include/amduat/tgk/tgk_store_mem.h new file mode 100644 index 0000000..fc8bd38 --- /dev/null +++ b/include/amduat/tgk/tgk_store_mem.h @@ -0,0 +1,6 @@ +#ifndef AMDUAT_TGK_TGK_STORE_MEM_H +#define AMDUAT_TGK_TGK_STORE_MEM_H + +/* In-memory TGK store adapter public API. */ + +#endif /* AMDUAT_TGK_TGK_STORE_MEM_H */ diff --git a/include/amduat/util/arena.h b/include/amduat/util/arena.h new file mode 100644 index 0000000..e69de29 diff --git a/include/amduat/util/buf.h b/include/amduat/util/buf.h new file mode 100644 index 0000000..e69de29 diff --git a/src/adapters/asl_store_fs/asl_store_fs.c b/src/adapters/asl_store_fs/asl_store_fs.c new file mode 100644 index 0000000..5aeadb7 --- /dev/null +++ b/src/adapters/asl_store_fs/asl_store_fs.c @@ -0,0 +1,3 @@ +#include "amduat/asl/asl_store_fs.h" + +/* TODO: implement filesystem-backed ASL store adapter. */ diff --git a/src/adapters/tgk_store_fs/tgk_store_fs.c b/src/adapters/tgk_store_fs/tgk_store_fs.c new file mode 100644 index 0000000..a38803e --- /dev/null +++ b/src/adapters/tgk_store_fs/tgk_store_fs.c @@ -0,0 +1,3 @@ +#include "amduat/tgk/tgk_store_fs.h" + +/* TODO: implement filesystem-backed TGK store adapter. */ diff --git a/src/adapters/tgk_store_mem/tgk_store_mem.c b/src/adapters/tgk_store_mem/tgk_store_mem.c new file mode 100644 index 0000000..dffde7d --- /dev/null +++ b/src/adapters/tgk_store_mem/tgk_store_mem.c @@ -0,0 +1,3 @@ +#include "amduat/tgk/tgk_store_mem.h" + +/* TODO: implement in-memory TGK store adapter. */ diff --git a/src/internal/arena.c b/src/internal/arena.c new file mode 100644 index 0000000..e69de29 diff --git a/src/internal/buf.c b/src/internal/buf.c new file mode 100644 index 0000000..e69de29 diff --git a/src/internal/endian.c b/src/internal/endian.c new file mode 100644 index 0000000..e69de29 diff --git a/src/internal/varint.c b/src/internal/varint.c new file mode 100644 index 0000000..e69de29 diff --git a/src/kernel/asl/core.c b/src/kernel/asl/core.c new file mode 100644 index 0000000..97d20fb --- /dev/null +++ b/src/kernel/asl/core.c @@ -0,0 +1,76 @@ +#include "amduat/asl/core.h" + +#include + +amduat_octets_t amduat_octets(const void *data, size_t len) { + amduat_octets_t v; + v.data = (const uint8_t *)data; + v.len = len; + return v; +} + +bool amduat_octets_eq(amduat_octets_t a, amduat_octets_t b) { + if (a.len != b.len) { + return false; + } + if (a.len == 0) { + return true; + } + return memcmp(a.data, b.data, a.len) == 0; +} + +bool amduat_octets_is_empty(amduat_octets_t v) { + return v.len == 0; +} + +amduat_type_tag_t amduat_type_tag(uint32_t tag_id) { + amduat_type_tag_t t; + t.tag_id = tag_id; + return t; +} + +bool amduat_type_tag_eq(amduat_type_tag_t a, amduat_type_tag_t b) { + return a.tag_id == b.tag_id; +} + +amduat_reference_t amduat_reference(amduat_hash_id_t hash_id, + amduat_octets_t digest) { + amduat_reference_t r; + r.hash_id = hash_id; + r.digest = digest; + return r; +} + +bool amduat_reference_eq(amduat_reference_t a, amduat_reference_t b) { + return (a.hash_id == b.hash_id) && amduat_octets_eq(a.digest, b.digest); +} + +amduat_artifact_t amduat_artifact(amduat_octets_t bytes) { + amduat_artifact_t a; + a.bytes = bytes; + a.has_type_tag = false; + a.type_tag.tag_id = 0; + return a; +} + +amduat_artifact_t amduat_artifact_with_type(amduat_octets_t bytes, + amduat_type_tag_t type_tag) { + amduat_artifact_t a; + a.bytes = bytes; + a.has_type_tag = true; + a.type_tag = type_tag; + return a; +} + +bool amduat_artifact_eq(amduat_artifact_t a, amduat_artifact_t b) { + if (!amduat_octets_eq(a.bytes, b.bytes)) { + return false; + } + if (a.has_type_tag != b.has_type_tag) { + return false; + } + if (!a.has_type_tag) { + return true; + } + return amduat_type_tag_eq(a.type_tag, b.type_tag); +} diff --git a/src/kernel/pel/core.c b/src/kernel/pel/core.c new file mode 100644 index 0000000..e69de29 diff --git a/src/kernel/tgk/core.c b/src/kernel/tgk/core.c new file mode 100644 index 0000000..e69de29 diff --git a/src/near_core/asl/registry.c b/src/near_core/asl/registry.c new file mode 100644 index 0000000..f7fb9ab --- /dev/null +++ b/src/near_core/asl/registry.c @@ -0,0 +1,186 @@ +#include "amduat/asl/registry.h" + +#include + +void amduat_asl_registry_value_init(amduat_asl_registry_value_t *value, + amduat_asl_registry_entry_t *entries, + size_t cap) { + if (value == NULL) { + return; + } + value->entries = entries; + value->len = 0; + value->cap = cap; +} + +int amduat_asl_registry_key_cmp(amduat_octets_t a, amduat_octets_t b) { + size_t min_len; + int cmp; + + min_len = a.len < b.len ? a.len : b.len; + if (min_len > 0) { + cmp = memcmp(a.data, b.data, min_len); + if (cmp != 0) { + return cmp; + } + } + + if (a.len < b.len) { + return -1; + } + if (a.len > b.len) { + return 1; + } + return 0; +} + +bool amduat_asl_registry_value_insert(amduat_asl_registry_value_t *value, + amduat_asl_registry_entry_t entry) { + size_t left; + size_t right; + + if (value == NULL || value->entries == NULL) { + return false; + } + if (value->len >= value->cap) { + return false; + } + + left = 0; + right = value->len; + while (left < right) { + size_t mid; + int cmp; + + mid = left + (right - left) / 2; + cmp = amduat_asl_registry_key_cmp(entry.key, value->entries[mid].key); + if (cmp == 0) { + return false; + } + if (cmp < 0) { + right = mid; + } else { + left = mid + 1; + } + } + + if (left < value->len) { + size_t i; + for (i = value->len; i > left; --i) { + value->entries[i] = value->entries[i - 1]; + } + } + value->entries[left] = entry; + value->len += 1; + return true; +} + +const amduat_asl_registry_entry_t *amduat_asl_registry_value_lookup( + const amduat_asl_registry_value_t *value, + amduat_octets_t key) { + size_t left; + size_t right; + + if (value == NULL || value->entries == NULL) { + return NULL; + } + + left = 0; + right = value->len; + while (left < right) { + size_t mid; + int cmp; + + mid = left + (right - left) / 2; + cmp = amduat_asl_registry_key_cmp(key, value->entries[mid].key); + if (cmp == 0) { + return &value->entries[mid]; + } + if (cmp < 0) { + right = mid; + } else { + left = mid + 1; + } + } + + return NULL; +} + +void amduat_asl_registry_store_init(amduat_asl_registry_store_t *reg, + amduat_asl_store_t *store, + amduat_asl_registry_codec_t codec) { + if (reg == NULL) { + return; + } + reg->store = store; + reg->codec = codec; +} + +amduat_asl_registry_error_t amduat_asl_registry_store_put( + amduat_asl_registry_store_t *reg, + const amduat_asl_registry_value_t *value, + amduat_reference_t *out_ref, + amduat_asl_store_error_t *out_store_err) { + amduat_octets_t bytes; + amduat_artifact_t artifact; + amduat_asl_store_error_t store_err; + + if (out_store_err != NULL) { + *out_store_err = AMDUAT_ASL_STORE_OK; + } + + if (reg == NULL || reg->store == NULL || reg->codec.encode == NULL) { + return AMDUAT_ASL_REGISTRY_ERR_CODEC; + } + + if (!reg->codec.encode(reg->codec.ctx, value, &bytes)) { + return AMDUAT_ASL_REGISTRY_ERR_CODEC; + } + + if (reg->codec.has_type_tag) { + artifact = amduat_artifact_with_type(bytes, reg->codec.type_tag); + } else { + artifact = amduat_artifact(bytes); + } + + store_err = amduat_asl_store_put(reg->store, artifact, out_ref); + if (store_err != AMDUAT_ASL_STORE_OK) { + if (out_store_err != NULL) { + *out_store_err = store_err; + } + return AMDUAT_ASL_REGISTRY_ERR_STORE; + } + + return AMDUAT_ASL_REGISTRY_OK; +} + +amduat_asl_registry_error_t amduat_asl_registry_store_get( + amduat_asl_registry_store_t *reg, + amduat_reference_t ref, + amduat_asl_registry_value_t *out_value, + amduat_asl_store_error_t *out_store_err) { + amduat_artifact_t artifact; + amduat_asl_store_error_t store_err; + + if (out_store_err != NULL) { + *out_store_err = AMDUAT_ASL_STORE_OK; + } + + if (reg == NULL || reg->store == NULL || reg->codec.decode == NULL) { + return AMDUAT_ASL_REGISTRY_ERR_CODEC; + } + + store_err = amduat_asl_store_get(reg->store, ref, &artifact); + if (store_err != AMDUAT_ASL_STORE_OK) { + if (out_store_err != NULL) { + *out_store_err = store_err; + } + return AMDUAT_ASL_REGISTRY_ERR_STORE; + } + + if (!reg->codec.decode(reg->codec.ctx, artifact.bytes, out_value)) { + return AMDUAT_ASL_REGISTRY_ERR_CODEC; + } + + return AMDUAT_ASL_REGISTRY_OK; +} diff --git a/src/near_core/asl/store.c b/src/near_core/asl/store.c new file mode 100644 index 0000000..079b82a --- /dev/null +++ b/src/near_core/asl/store.c @@ -0,0 +1,31 @@ +#include "amduat/asl/store.h" + +void amduat_asl_store_init(amduat_asl_store_t *store, + amduat_asl_store_config_t config, + amduat_asl_store_ops_t ops, + void *ctx) { + if (store == NULL) { + return; + } + store->config = config; + store->ops = ops; + store->ctx = ctx; +} + +amduat_asl_store_error_t amduat_asl_store_put(amduat_asl_store_t *store, + amduat_artifact_t artifact, + amduat_reference_t *out_ref) { + if (store == NULL || store->ops.put == NULL) { + return AMDUAT_ASL_STORE_ERR_UNSUPPORTED; + } + return store->ops.put(store->ctx, artifact, out_ref); +} + +amduat_asl_store_error_t amduat_asl_store_get(amduat_asl_store_t *store, + amduat_reference_t ref, + amduat_artifact_t *out_artifact) { + if (store == NULL || store->ops.get == NULL) { + return AMDUAT_ASL_STORE_ERR_UNSUPPORTED; + } + return store->ops.get(store->ctx, ref, out_artifact); +} diff --git a/src/near_core/enc/asl1_core.c b/src/near_core/enc/asl1_core.c new file mode 100644 index 0000000..368ea6a --- /dev/null +++ b/src/near_core/enc/asl1_core.c @@ -0,0 +1,9 @@ +#include "amduat/enc/asl1_core.h" + +amduat_octets_t amduat_enc_asl1_core_key( + amduat_asl_encoding_profile_id_t profile_id, + uint8_t out[2]) { + out[0] = (uint8_t)(profile_id >> 8); + out[1] = (uint8_t)(profile_id & 0xff); + return amduat_octets(out, 2); +} diff --git a/src/near_core/enc/pel_program_dag.c b/src/near_core/enc/pel_program_dag.c new file mode 100644 index 0000000..e69de29 diff --git a/src/near_core/enc/pel_trace_dag.c b/src/near_core/enc/pel_trace_dag.c new file mode 100644 index 0000000..e69de29 diff --git a/src/near_core/enc/tgk1_edge.c b/src/near_core/enc/tgk1_edge.c new file mode 100644 index 0000000..e69de29 diff --git a/src/near_core/hash/asl1.c b/src/near_core/hash/asl1.c new file mode 100644 index 0000000..011216d --- /dev/null +++ b/src/near_core/hash/asl1.c @@ -0,0 +1,7 @@ +#include "amduat/hash/asl1.h" + +amduat_octets_t amduat_hash_asl1_key(amduat_hash_id_t hash_id, uint8_t out[2]) { + out[0] = (uint8_t)(hash_id >> 8); + out[1] = (uint8_t)(hash_id & 0xff); + return amduat_octets(out, 2); +} diff --git a/src/pel_stack/opreg/kernel.c b/src/pel_stack/opreg/kernel.c new file mode 100644 index 0000000..e69de29 diff --git a/src/pel_stack/opreg/kernel_params.c b/src/pel_stack/opreg/kernel_params.c new file mode 100644 index 0000000..e69de29 diff --git a/src/pel_stack/program_dag/program_dag.c b/src/pel_stack/program_dag/program_dag.c new file mode 100644 index 0000000..e69de29 diff --git a/src/pel_stack/program_dag/program_dag_desc.c b/src/pel_stack/program_dag/program_dag_desc.c new file mode 100644 index 0000000..e69de29 diff --git a/src/pel_stack/surf/surf.c b/src/pel_stack/surf/surf.c new file mode 100644 index 0000000..e69de29 diff --git a/src/pel_stack/trace_dag/trace_dag.c b/src/pel_stack/trace_dag/trace_dag.c new file mode 100644 index 0000000..e69de29 diff --git a/src/tgk_stack/enc_edge/tgk1_edge.c b/src/tgk_stack/enc_edge/tgk1_edge.c new file mode 100644 index 0000000..e69de29 diff --git a/src/tgk_stack/prov/prov.c b/src/tgk_stack/prov/prov.c new file mode 100644 index 0000000..e69de29 diff --git a/src/tgk_stack/store/store.c b/src/tgk_stack/store/store.c new file mode 100644 index 0000000..e69de29 diff --git a/tier1/amduat20-stack-overview.md b/tier1/amduat20-stack-overview.md new file mode 100644 index 0000000..2e3e61e --- /dev/null +++ b/tier1/amduat20-stack-overview.md @@ -0,0 +1,804 @@ +# SUBSTRATE/STACK-OVERVIEW — Amduat 2.0 Substrate Stack + +**Document ID:** `SUBSTRATE/STACK-OVERVIEW` +**Layer:** Overview (Tier-1 orientation) +**Status:** Approved (Tier-1 orientation; non-normative behaviour) +**Version:** 0.4.1 +**Last updated:** 2025-11-30 + +**Depends on (normative):** + +* `ASL/1-CORE v0.4.x` — Artifact substrate (value model) +* `ENC/ASL1-CORE v1.0.x` — canonical encoding profile (`ASL_ENC_CORE_V1`) +* `HASH/ASL1 v0.2.4` — ASL1 hash family & registry +* `ASL/1-STORE v0.4.x` — store semantics + +**References (up-stack / kernel & profiles):** + +* `PEL/1-CORE v0.3.x` — Primitive Execution Layer (core semantics) +* `PEL/1-SURF v0.2.x` — PEL store-backed execution surface +* `PEL/PROGRAM-DAG/1 v0.3.x` — DAG scheme over PEL/1-CORE +* `PEL/PROGRAM-DAG-DESC/1 v0.1.x` — scheme descriptor binding DAG scheme to an ASL/1 `SchemeRef` +* `PEL/TRACE-DAG/1 v0.1.x` — DAG execution trace profile +* `ENC/PEL-PROGRAM-DAG/1 v0.2.x` — canonical encoding for DAG programs +* `ENC/PEL-TRACE-DAG/1 v0.1.x` — canonical encoding for DAG traces +* `OPREG/PEL1-KERNEL v0.1.x` — kernel operation registry for PEL/1 +* `OPREG/PEL1-KERNEL-PARAMS/1 v0.1.x` — kernel operation parameter encodings +* `TGK/1-CORE v0.7.x` — Trace Graph Kernel (core semantics) +* `ENC/TGK1-EDGE/1 v0.1.x` — canonical encoding for TGK EdgeArtifacts +* `TGK/STORE/1 v0.2.x` — graph store & query semantics over TGK/1-CORE +* `TGK/PROV/1 v0.1.x` — provenance & trace semantics over `TGK/1-CORE` + +> **Normativity note** +> This document is Approved as a Tier-1 orientation surface. It is structurally normative for dependency and layering (what may depend on what) but does **not** introduce new behavioural rules beyond the referenced specs. + +> **Scope note (v0.4.0)** +> This revision relocates the stack map into `/amduat/tier1/` and locks it to the +> Amduat 2.0 kernel scope required by PH06. Certification, receipt, fact, and +> overlay profiles are explicitly deferred to later phases. + +> **PH06 closure note (v0.4.1)** +> TGK store/provenance surfaces now reference the PH06 TGK rebuild evidence: +> `/amduat/vectors/ph06/tgk/manifest.json`, +> `/amduat/logs/ph06/evidence/tgk/PH06-EV-TGK-REBUILD-001/`, and the governance +> bundle `PH06-EV-GOV-REG-001` that records registry/ADR alignment for closure. + +© 2025 Niklas Rydberg. + +## License + +Except where otherwise noted, this document (text and diagrams) is licensed under +the Creative Commons Attribution 4.0 International License (CC BY 4.0). + +The identifier registries and mapping tables (e.g. TypeTag IDs, HashId +assignments, EdgeTypeId tables) are additionally made available under CC0 1.0 +Universal (CC0) to enable unrestricted reuse in implementations and derivative +specifications. + +Code examples in this document are provided under the Apache License 2.0 unless +explicitly stated otherwise. Test vectors, where present, are dedicated to the +public domain under CC0 1.0. + +--- + +## 0. Purpose + +This document gives a **short, shared map** of the Amduat 2.0 substrate: + +* what the core layers are, +* what each layer is responsible for (and not responsible for), +* how they depend on one another. + +It is the “you are here” page for kernel work and substrate profiles. All detailed behaviour is defined in the referenced specs. + +--- + +## 1. Design North Star + +The substrate exists to make computing **deterministic, inspectable, and replayable**. + +Key invariants: + +1. **Immutable artifacts, content-addressed identity** + Everything that matters is an immutable `Artifact`; identity is `(hash_id, digest)` over a canonical encoding. + +2. **Tiny kernel, rich profiles** + The kernel only knows: + + * Artifacts (values) — `ASL/1-CORE` + * pure execution — `PEL/1-CORE` + * graph semantics — `TGK/1-CORE` + + Meaning (facts, receipts, overlays, policy) lives in profiles on top. + +3. **Single logical provenance graph** + All relationships (execution, certs, overlays, facts, annotations) appear as typed edges in one logical graph over artifact IDs (`Reference`s), via `TGK/1-CORE`. EdgeArtifacts are ordinary Artifacts; their identities are ordinary `Reference`s. + +4. **Additive evolution, no history rewrites** + + * New capabilities are introduced via new **versioned** surfaces (new specs, new profile IDs, new type tags). + * Existing artifacts remain valid under their original versions. + * Canonical encodings (e.g. `ASL_ENC_CORE_V1`, `PEL_ENC_PROGRAM_DAG_V1`, `TGK1_EDGE_ENC_V1`) are **never** changed in place. + +5. **Storage-neutral, policy-neutral kernel** + + * No special filesystem, database, or policy model is assumed. + * Stores, transports, graph stores, and policies are layered around the substrate via adapters and profiles. + +--- + +## 2. Layered Stack (Overview) + +At a high level: + +* **Kernel (value + compute + graph)** + + * `ASL/1-CORE` — values + * `PEL/1-CORE` — primitive execution + * `TGK/1-CORE` — trace graph kernel + +* **Near-core substrate & graph store profiles** + + * `ENC/ASL1-CORE` — canonical encoding for `Artifact` / `Reference` + * `HASH/ASL1` — hash family & `HashId` registry + * `ASL/1-STORE` — store semantics + * `ENC/TGK1-EDGE/1` — canonical encoding for TGK edges (EdgeArtifacts) + * `TGK/STORE/1` — graph store & basic query semantics over the TGK provenance graph + * `TGK/PROV/1` — provenance & trace semantics over `ProvenanceGraph` (pure functions over `TGK/1-CORE`) + +* **Canonical PEL/1 execution stack (L1 profiles)** + + * `PEL/1-SURF` — store-backed execution surface over `ASL/1-STORE` + * `PEL/PROGRAM-DAG/1` — DAG program scheme over PEL/1-CORE + * `PEL/PROGRAM-DAG-DESC/1` — scheme descriptor binding DAG programs to a concrete `SchemeRef` and Program `TypeTag`/encoding + * `PEL/TRACE-DAG/1` — DAG execution trace profile + * `ENC/PEL-PROGRAM-DAG/1` — canonical encoding for DAG programs + * `ENC/PEL-TRACE-DAG/1` — canonical encoding for DAG traces + * `OPREG/PEL1-KERNEL` — kernel operation set + * `OPREG/PEL1-KERNEL-PARAMS/1` — canonical param encodings for kernel ops + +* **Profiles above kernel + near-core** — intentionally out-of-scope for this + version to keep the focus on the Amduat 2.0 kernel contract. Certification, + receipt, fact, overlay, and domain profiles will reattach once the kernel is + proven inside future phases. + +Below is a more explicit breakdown. + +--- + +## 3. Kernel + +### 3.1 ASL/1-CORE — Artifact Substrate (L0) + +**Document:** `ASL/1-CORE` + +Defines the **logical value model**: + +```text +Artifact { + bytes: OctetString + type_tag: optional TypeTag +} + +TypeTag { + tag_id: uint32 +} + +Reference { + hash_id: HashId + digest: OctetString +} + +HashId = uint16 +``` + +Responsibilities: + +* What an Artifact *is*. + +* How identity is defined via **encoding profile + hash algorithm**: + + * `EncodingProfileId` (e.g. `ASL_ENC_CORE_V1`) + * `HashId` (e.g. `0x0001` = `HASH-ASL1-256`) + +* Separation of logical values from physical representation. + +Not responsible for: + +* Encoding bytes on wire or disk (that’s `ENC/ASL1-CORE`). +* Storage, replication, or access control (that’s `ASL/1-STORE` and up). +* Execution, signatures, facts, overlays, or graph storage/query. + +Everything else builds on this substrate. + +--- + +### 3.2 PEL/1-CORE — Primitive Execution Layer (L1) + +**Document:** `PEL/1-CORE` + +Defines **pure, deterministic execution** over Artifacts: + +* Schemes are identified by `SchemeRef` (a `Reference` to a scheme descriptor Artifact). + +* Programs, inputs, outputs, and params are **just Artifacts**; their types are identified by `TypeTag`. + +* Each scheme defines a pure function: + + ```text + Exec_s(program, inputs, params) -> (outputs, ExecutionResultValue) + ``` + +* No time, no network, no hidden mutable state, no implicit graph or certs. + +Responsibilities: + +* “Given this scheme, program, inputs, params → what outputs and result Artifact-shape arise?” +* Providing a deterministic `ExecutionResultValue` describing the outcome. + +Not responsible for: + +* Graph storage, certificates, policy, or stores. +* Encoding or hashing (PEL/1-CORE depends only on `ASL/1-CORE`; concrete surfaces use the near-core stack). + +Core invariants (from the PEL spec): + +* **Determinism** — for fixed `(scheme_ref, program, inputs, params)`, all conformant implementations produce identical outputs and result values. +* **Purity** — no external mutable state, time, randomness, or I/O in the semantics of `Exec_s`. +* **Hot-path isolation** — PEL/1-CORE execution does **not** require any certification, receipt, fact, or overlay profiles on the hot path; provenance is derived after the fact from Artifacts and results via TGK surfaces. + +--- + +### 3.3 TGK/1-CORE — Trace Graph Kernel (L1.5) + +**Document:** `TGK/1-CORE` + +Defines the **logical provenance graph** as a projection over Artifacts: + +* **Nodes** are Artifact IDs: + + ```text + Node := Reference + ``` + +* **Edges** are `EdgeBody` values encoded inside EdgeArtifacts: + + ```text + EdgeBody { + type: EdgeTypeId // uint32 + from: Node[] // ordered, MAY be empty + to: Node[] // ordered, MAY be empty + payload: Reference // always present + } + ``` + +* **EdgeArtifacts** are ordinary `Artifact`s whose payload decodes (under some edge encoding profile) to an `EdgeBody`. Their identity is an ordinary `Reference` (“`EdgeRef`”). + +Responsibilities: + +* Node model: `Node := Reference`. +* Edge model: `EdgeBody` + edge-tag/type configuration. +* Defining a **ProvenanceGraph** as a pure projection over: + + * a finite set of Artifacts, and + * a configured set of edge encoding profiles and type catalogs. + +Core kernel rules: + +* All TGK edges **must** be represented as Artifacts; there is no separate edge ID space. +* For any snapshot (finite Artifact set + profile set), the induced `ProvenanceGraph` is **unique**; any graph indexes or caches are optimizations only. +* TGK does **not** define how edges are encoded into bytes; that is the role of edge encoding profiles such as `ENC/TGK1-EDGE/1`, built on the ASL stack. +* TGK does **not** define how the graph is stored or queried; that is the role of `TGK/STORE/1` and related profiles. + +Not responsible for: + +* How the graph is physically stored or queried (`TGK/STORE/1`). +* Policy decisions about what edge patterns “mean” (facts, authority, reachability); that’s profiles. + +Layering with PEL/1-CORE: + +* PEL executions MAY be *described* via TGK edges (e.g., `EDGE_EXECUTION`, `EDGE_RECEIPT_SUPPORTS`). +* TGK is **not** a runtime dependency for PEL; edges can be emitted by runtimes or reconstructed from Artifacts (programs, inputs, traces, receipts). + +--- + +## 4. Near-core Substrate & Graph Store Profiles + +These surfaces are not kernel primitives, but are canonical and expected in almost all deployments. + +### 4.1 ENC/ASL1-CORE v1 — Core Canonical Encoding + +**Document:** `ENC/ASL1-CORE` +**Profile ID:** `ASL_ENC_CORE_V1 = 0x0001` + +Defines the **canonical binary encoding** for `ASL/1-CORE` values: + +* `ArtifactBytes` — canonical encoding for `Artifact`. +* `ReferenceBytes` — canonical encoding for `Reference`. + +Responsibilities: + +* Provide injective, stable, deterministic, streaming-friendly encodings for `Artifact` and `Reference`. +* Define the canonical byte sequences used as input to ASL1 hash functions and stores. +* Satisfy all canonical encoding constraints from `ASL/1-CORE` (injective, stable, deterministic, explicit structure, type-sensitive, byte-transparent, streaming-friendly). + +Not responsible for: + +* Hash algorithms or `HashId` registry (`HASH/ASL1`). +* Storage or transport protocols. +* Scheme-specific encodings (PEL, TGK, domain formats). + +Identity discipline: + +* The profile ID (`ASL_ENC_CORE_V1`) and version are **not** embedded into `ArtifactBytes` / `ReferenceBytes`. +* For any context where `Reference` values are derived, the applicable encoding profile (typically `ASL_ENC_CORE_V1`) MUST be fixed and explicit. +* A given `(hash_id, digest)` pair corresponds to exactly one canonical `ArtifactBytes` under the chosen encoding profile. + +--- + +### 4.2 HASH/ASL1 — Hash Algorithm Registry + +**Document:** `HASH/ASL1` + +Defines the **ASL1 hash family**: + +* Assigns stable `HashId` values (uint16) to concrete cryptographic hash algorithms. +* Defines `HASH-ASL1-256` (`HashId = 0x0001`, SHA-256) as the mandatory baseline. +* Reserves ranges for additional classical and post-quantum algorithms. + +Responsibilities: + +* Mapping `HashId → algorithm descriptor` (algorithm, digest length, constraints). +* Ensuring assignments are immutable within the registry. +* Providing a globally shared interpretation of `Reference.hash_id` throughout the stack. + +Not responsible for: + +* When/how to migrate between algorithms (policy & profile level). +* Encoding/storage of digests (handled by `ASL/1-CORE` + `ENC/ASL1-CORE`). +* Store behaviour or execution semantics. + +`HASH/ASL1` + `ENC/ASL1-CORE` + `ASL/1-CORE` together define the **identity discipline** for `Reference`. + +--- + +### 4.3 ASL/1-STORE — Store Semantics + +**Document:** `ASL/1-STORE` + +Defines the **abstract content-addressable store model**: + +* At a snapshot, a StoreInstance is a partial mapping: + + ```text + Reference -> Artifact // 0 or 1 Artifact per Reference + ``` + +* With a fixed `StoreConfig`: + + ```text + StoreConfig { + encoding_profile: EncodingProfileId // e.g. ASL_ENC_CORE_V1 + hash_id: HashId // e.g. 0x0001 (HASH-ASL1-256) + } + ``` + +* Operations (logical): + + * `put(Artifact) -> Reference | ERR_INTEGRITY | ERR_UNSUPPORTED` + * `get(Reference) -> Artifact | ERR_NOT_FOUND | ERR_UNSUPPORTED | ERR_INTEGRITY` + +Responsibilities: + +* Content-addressable semantics over `ASL/1-CORE`. +* Minimal logical error model at the store boundary. +* Clear identity semantics: no two non-identical Artifacts may share a `Reference` under the same `StoreConfig`. + +Not responsible for: + +* Concrete APIs / protocols (HTTP, gRPC, language APIs). +* Security, multi-tenancy, replication, or GC. +* Concurrency control or transactional semantics. +* Execution or provenance semantics. + +ASL/1-STORE reuses ASL identity; it does **not** invent a new notion of “object ID”. + +--- + +### 4.4 PEL/1 Execution Stack — Canonical DAG & Trace Profiles + +These are standard, near-core profiles that give a concrete, store-backed execution stack for PEL/1-CORE. + +**`PEL/1-SURF` — Primitive Execution Surface** + +* Wires `Exec_s` from `PEL/1-CORE` to `ASL/1-STORE`. +* Takes `scheme_ref`, `program_ref`, `input_refs`, `params_ref` plus a StoreInstance. +* Resolves Artifacts, invokes `Exec_s`, persists outputs and a surface-level `ExecutionResult` Artifact. +* Does **not** depend on TGK, CIL, FER, FCT, or OI on the execution hot path. + +**`PEL/PROGRAM-DAG/1` — DAG Program Scheme** + +* Defines Programs as acyclic graphs (`Node`s) over Artifact inputs/outputs. +* Provides structural validity, a canonical topological order, and `Exec_DAG` as a PEL/1-CORE scheme. +* Store-neutral; used by `PEL/1-SURF` as a concrete scheme. + +**`PEL/PROGRAM-DAG-DESC/1` — DAG Scheme Descriptor** + +* Defines the logical descriptor value for the DAG scheme (`DagSchemeDescriptor`) and its encoding as a dedicated scheme-descriptor Artifact. +* Binds `PEL/PROGRAM-DAG/1` to a concrete `SchemeRef` (`SchemeRef_DAG_1`) by hashing that descriptor Artifact under `ASL_ENC_CORE_V1` and a chosen `HashId` (Amduat 2.0 baseline: `HASH-ASL1-256`, `HashId = 0x0001`). +* Records, in a content-addressed way, which `TypeTag` and encoding profile (`TYPE_TAG_PEL_PROGRAM_DAG_1`, `PEL_ENC_PROGRAM_DAG_V1`) identify Program Artifacts for this scheme. +* Remains store- and graph-neutral; engines MAY treat the descriptor as build-time or configuration metadata and are not required to read it on the execution hot path. + +**`ENC/PEL-PROGRAM-DAG/1` — Program Encoding** + +* Canonical binary encoding for `Program` values (`PEL/PROGRAM-DAG/1`). +* Fixes node layout, `DagInput` representation, and root layout. +* Used as `Artifact.bytes` under a dedicated `TypeTag` (`TYPE_TAG_PEL_PROGRAM_DAG_1`). + +**`PEL/TRACE-DAG/1` — DAG Execution Trace Profile** + +* Defines `TraceDAGValue`: per-node run status, output refs, and deterministic diagnostics, linked to `scheme_ref`, `program_ref`, `input_refs`, `params_ref`. +* Optional but deterministic; enables TGK to reconstruct node-level execution edges (via profiles like `TGK/PEL/1`). + +**`ENC/PEL-TRACE-DAG/1` — Trace Encoding** + +* Canonical encoding for `TraceDAGValue`. +* Used as `Artifact.bytes` under `TYPE_TAG_PEL_TRACE_DAG_1`. +* Embeds `Reference` values via `ReferenceBytes` (`ENC/ASL1-CORE`). + +**`OPREG/PEL1-KERNEL` + `OPREG/PEL1-KERNEL-PARAMS/1` — Kernel Ops & Params** + +* Define a minimal, globally stable set of PEL kernel ops (e.g. `pel.bytes.concat`, `pel.bytes.slice`, `pel.bytes.const`, `pel.bytes.hash.asl1`). +* Specify logical parameter types and runtime error codes. +* Provide canonical parameter encodings/decodings for those ops. +* Are pure, deterministic, and store-/graph-neutral; they sit entirely inside the PEL/1-CORE model. + +--- + +### 4.5 TGK Edge Encoding — `ENC/TGK1-EDGE/1` + +**Document:** `ENC/TGK1-EDGE/1` +**Profile ID:** `TGK1_EDGE_ENC_V1` + +Defines the **canonical binary encoding** of TGK `EdgeBody` values: + +```text +EdgeBody { + type: EdgeTypeId + from: Node[] // Node = Reference + to: Node[] + payload: Reference +} +``` + +as `EdgeBytes`, and their embedding as EdgeArtifacts: + +```text +Artifact { + bytes = EdgeBytes + type_tag = TYPE_TAG_TGK1_EDGE_V1 +} +``` + +Responsibilities: + +* Give a **single, injective, streaming-friendly layout** for: + + * `EdgeTypeId`, + * ordered `from` / `to` lists, and + * `payload : Reference`. + +* Define `encode_edgebody_tgk1_v1` / `decode_edgebody_tgk1_v1`: + + * A fixed `edge_version (u16)` guard word, always `1` in this profile. + * Use of canonical `ReferenceBytes` (`ENC/ASL1-CORE`) for embedded `Reference`s, via `EncodedRef` length-prefix wrappers. + * Enforcement of `TGK/EDGE-NONEMPTY-ENDPOINT/CORE/1` at the encoding layer. + +* Provide a canonical EdgeArtifact shape (`TYPE_TAG_TGK1_EDGE_V1`) that environments can treat as edges in `TGK/1-CORE`. + +Not responsible for: + +* Semantics of specific `EdgeTypeId` values (those belong to TGK type catalogs and profiles). +* Graph store or query semantics (those belong to `TGK/STORE/1`). +* Edge identity (still given by ASL/1 `Reference` over the EdgeArtifact). + +Design note: + +* `edge_version` is a **guard**, not an evolution knob: + + * For `TGK1_EDGE_ENC_V1`, encoders MUST always write `edge_version = 1`. + * Decoders for this profile MUST accept only `1` and treat anything else as “not a `TGK1_EDGE_ENC_V1` payload”. + * Any incompatible change should be a new profile (and typically a new `TypeTag`), not `edge_version = 2` within this profile. + +--- + +### 4.6 TGK/STORE/1 — Graph Store & Query Profile + +**Document:** `TGK/STORE/1` + +Defines the **graph store abstraction and basic query semantics** over the TGK provenance graph induced by Artifacts: + +* Treats TGK’s `ProvenanceGraph` as the source of truth: + + ```text + ProvenanceGraph { + Nodes: set + Edges: set<(EdgeRef, EdgeBody)> + } + ``` + +* Introduces `GraphStoreInstance` / `GraphStoreSnapshot` as: + + * a finite `Artifacts` set drawn from one or more `ASL/1-STORE` instances or equivalent feeds, and + * a `TGKProfileSet` (edge tags, encodings, edge types) “in effect” for that snapshot. + +* Exposes minimal, identity-preserving graph operations: + + * `resolve_edge(EdgeRef) -> EdgeBody | error` + * adjacency queries: `edges_from`, `edges_to`, `edges_incident` + * optional `scan_edges` for paginated edge scans + * optional `neighbors` for node-neighbor projection + +Responsibilities: + +* Ensure that all graph views are consistent with `TGK/1-CORE`’s projection for some `(Artifacts, TGKProfileSet)` snapshot. +* Define a small graph-level error model (e.g. “ref is not an edge”, unsupported identity domain, underlying artifact error). +* Fix a canonical edge and node ordering (based on `(hash_id, digest)`) for deterministic results across implementations. + +Not responsible for: + +* Provenance algorithms (backward/forward reachability, trace construction) — those belong to `TGK/PROV/1` and higher-level profiles. +* Certificate, fact, or overlay semantics (`CIL/1`, `FER/1`, `FCT/1`, `OI/1`). +* Transport or API shapes (HTTP/gRPC/etc.). +* Store semantics themselves (`ASL/1-STORE` is the source of artifact resolution). + +`TGK/STORE/1` is strictly: + +> “Given a graph defined by `TGK/1-CORE`, how do we expose its edges and adjacency in a way that is deterministic, identity-preserving, and policy-neutral?” + +Kernel provenance semantics (`TGK/PROV/1`) and higher-level profiles (`FER/1`, `FCT/1`, `CIL/1`, `OI/1`, domain profiles) typically use `TGK/STORE/1` as their standard graph-access surface. + +--- + +### 4.7 TGK/PROV/1 — Provenance & Trace Semantics over TGK/1-CORE + +**Document:** `TGK/PROV/1` + +Defines the **kernel provenance operators** over a `ProvenanceGraph` snapshot: + +* A small, store-neutral provenance parameter model: + + * `ProvDirection` (BACKWARD, FORWARD, BOTH), + * `EdgeTypeFilter` (edge-type selection), + * `DepthLimit` (hop-count bound). + +* Pure, deterministic operators: + + * `prov_closure_nodes(G, S, Q)` — reachable node set from finite seeds `S`, + * `prov_depths(G, S, Q)` — minimum-hop depth map over that closure, + * `prov_layers(G, S, Q)` — depth-partitioned layering of reachable nodes, + * `prov_trace(G, S, Q)` — induced `TraceGraph` subgraph view. + +Responsibilities: + +* Provide a **graph-only** provenance kernel over `TGK/1-CORE`: + + * semantics are defined purely in terms of `ProvenanceGraph` snapshots and finite seed sets, + * no dependence on any particular store, index, or query API. + +* Fix canonical behaviour for backward/forward/bidirectional reachability, depth assignment, and trace construction that higher profiles can rely on. + +* Make closure and trace behaviour algebraically well-behaved (closure operator, monotone in seeds, depth limits, and edge-type selection). + +Not responsible for: + +* Graph store or adjacency APIs (`TGK/STORE/1`); these are implementation choices. +* Edge-type **meaning** or provenance policy (“which edges count as causal vs decorative”); that is decided by profiles (e.g. PEL/TRACE-DAG, `FER/1`, `FCT/1`, `OI/1`). +* Encoding of provenance results into Artifacts (receipts, reports, overlays). + +Layering: + +* Depends **normatively** only on `TGK/1-CORE` (and, via TGK, `ASL/1-CORE`). +* Typically implemented on top of `TGK/STORE/1` for adjacency and edge resolution, but any implementation that operates over a `ProvenanceGraph` snapshot and satisfies `TGK/PROV/1`’s semantics is valid. +* Used by `FER/1`, `FCT/1`, `CIL/1`, `OI/1`, and domain profiles as the canonical way to ask provenance questions over the TGK graph. + +--- + +## 5. Profiles on Top + +These build on kernel + near-core and express semantics and policy. + +* **CIL/1 — Certification & Integrity Layer** + + * Certificates as Artifacts. + * Generic signatures over `Reference`s and other Artifacts. + * Separation between “is the signature cryptographically valid?” (CIL) and “what does it mean?” (profiles such as FCT/1). + +* **FER/1 — Evidence / Receipts** + + * Receipts bundling descriptors, inputs, outputs, traces, and certs as Artifacts. + * Plays well with TGK via edges like “this receipt supports this fact” or “this receipt is attached to this execution”. + * Typically uses `TGK/PROV/1` to construct provenance cones and trace projections when building or checking receipts. + +* **FCT/1 — Facts / Transactions** + + * Facts/transactions as semantics over receipts + certificates + TGK graph patterns. + * Governs acceptance criteria for domain-level “truth”. + * May rely on `TGK/PROV/1` to define which provenance patterns or trace shapes are acceptable evidence for particular fact schemas. + +* **OI/1 — Overlays & Navigation** + + * Overlays mapping human-friendly names, paths, or views to `Reference`s. + * Graph-based navigation and UI surfaces, often driven by `TGK/PROV/1` queries over `TGK/STORE/1`. + +All of these: + +* encode their objects as Artifacts (own `TypeTag` ranges and encodings), +* use `Reference`s as durable pointers into the substrate, +* typically expose relationships as TGK edges, using edge types bound to `ENC/TGK1-EDGE/1` (or other edge encodings where appropriate), +* and query the resulting graph via `TGK/STORE/1` / `TGK/PROV/1` or equivalent. + +Higher-level domain profiles (AI evals, CI pipelines, governance processes, ledgers, etc.) sit above all of this. + +--- + +## 6. Dependency Summary + +**Conceptual DAG (simplified):** + +* `ASL/1-CORE` + + * ↓ used by `ENC/ASL1-CORE`, `HASH/ASL1`, `ASL/1-STORE`, + `PEL/1-CORE`, `TGK/1-CORE`, `PEL/PROGRAM-DAG/1`, `PEL/PROGRAM-DAG-DESC/1`, `PEL/TRACE-DAG/1`, + `OPREG/PEL1-KERNEL`, `OPREG/PEL1-KERNEL-PARAMS/1`, + `ENC/TGK1-EDGE/1`, `TGK/STORE/1`, `TGK/PROV/1`, + `CIL/1`, `FER/1`, `FCT/1`, `OI/1`. + +* `ENC/ASL1-CORE` + + * ↓ used by `HASH/ASL1` (for `ArtifactBytes` / `ReferenceBytes`), + `ASL/1-STORE`, `PEL/1-SURF`, + `ENC/PEL-PROGRAM-DAG/1`, `ENC/PEL-TRACE-DAG/1`, `PEL/PROGRAM-DAG-DESC/1`, + `ENC/TGK1-EDGE/1`, `TGK/STORE/1`, and higher profiles. + +* `HASH/ASL1` + + * ↓ used by `ASL/1-STORE`, `PEL/1-SURF`, + `ENC/PEL-PROGRAM-DAG/1`, `ENC/PEL-TRACE-DAG/1`, `PEL/PROGRAM-DAG-DESC/1`, + `ENC/TGK1-EDGE/1`, `TGK/STORE/1`, and higher profiles for identity. + +* `ASL/1-STORE` + + * ↓ used by `PEL/1-SURF`, TGK storage/indexing profiles (`TGK/STORE/1`), + `CIL/1`, `FER/1`, `FCT/1`, `OI/1`. + +* `PEL/1-CORE` + + * ↓ used by `PEL/1-SURF`, `PEL/PROGRAM-DAG/1`, `PEL/PROGRAM-DAG-DESC/1`, `PEL/TRACE-DAG/1`, + `OPREG/PEL1-KERNEL`, domain execution schemes. + +* `PEL/1-SURF`, `PEL/PROGRAM-DAG/1`, `PEL/PROGRAM-DAG-DESC/1`, `PEL/TRACE-DAG/1`, + `ENC/PEL-PROGRAM-DAG/1`, `ENC/PEL-TRACE-DAG/1`, `OPREG/PEL1-KERNEL`, `OPREG/PEL1-KERNEL-PARAMS/1` + + * ↓ used by `FER/1`, `FCT/1`, `CIL/1`, `OI/1` and domain profiles to build reproducible executions, traces, and evidence. + +* `TGK/1-CORE` + + * ↓ used by `ENC/TGK1-EDGE/1` (for the logical `EdgeBody` model), + `TGK/STORE/1`, `TGK/PROV/1`, + `FER/1`, `FCT/1`, `CIL/1`, `OI/1`, and domain profiles for provenance reconstruction. + +* `ENC/TGK1-EDGE/1` + + * ↓ used by TGK-aware stores (`TGK/STORE/1`), provenance profiles (`TGK/PROV/1`), + and by higher layers (e.g. `FER/1`, `FCT/1`, `CIL/1`, `OI/1`) that emit or interpret EdgeArtifacts. + +* `TGK/STORE/1` + + * ↓ used by `TGK/PROV/1` (provenance operators), + and by higher layers (`FER/1`, `FCT/1`, `CIL/1`, `OI/1` and domain profiles) as the standard way to query the provenance graph. + * Evidence hooks: PH06 rebuild CLI and parity receipts live under + `/amduat/vectors/ph06/tgk/manifest.json` and + `/amduat/logs/ph06/evidence/tgk/PH06-EV-TGK-REBUILD-001/`. + +* `TGK/PROV/1` + + * ↓ used by `FER/1`, `FCT/1`, `CIL/1`, `OI/1`, and domain profiles as the canonical kernel for provenance closure, depth, and trace semantics over TGK graphs, typically implemented on top of `TGK/STORE/1`. + +Higher-level domain profiles depend on all of the above as needed. + +--- + +## 7. How to Use This Overview When Editing Specs + +When editing: + +* **ASL/1-CORE:** + + * Keep this doc + Tier-0 invariants in view. + * Must not depend on any other spec. + +* **ENC/ASL1-CORE / HASH/ASL1:** + + * Keep `ASL/1-CORE` + this overview in view. + * Must not re-define `Artifact`, `Reference`, or `HashId`. + +* **ASL/1-STORE:** + + * Keep `ASL/1-CORE`, `ENC/ASL1-CORE`, `HASH/ASL1` + this overview in view. + * Must not introduce new identity semantics or policy. + +* **PEL/1-CORE:** + + * Keep `ASL/1-CORE` + this overview in view. + * Kernel semantics are storage- and graph-neutral. + * Must not require `TGK/1-CORE`, `ENC/TGK1-EDGE/1`, `TGK/STORE/1`, `CIL/1` or higher profiles on the execution hot path. + +* **PEL/1-SURF:** + + * Keep `ASL/1-CORE`, `ENC/ASL1-CORE`, `HASH/ASL1`, `ASL/1-STORE`, `PEL/1-CORE` + this overview in view. + * May wire `Exec_s` to stores, but must not depend on TGK, CIL, FER, FCT, or OI on the execution hot path. + +* **PEL/PROGRAM-DAG/1, ENC/PEL-PROGRAM-DAG/1 & PEL/PROGRAM-DAG-DESC/1:** + + * Keep `ASL/1-CORE`, `PEL/1-CORE` + this overview in view. + * `PEL/PROGRAM-DAG/1` defines the scheme model and evaluation semantics; `ENC/PEL-PROGRAM-DAG/1` defines the canonical Program encoding; `PEL/PROGRAM-DAG-DESC/1` binds that scheme to a concrete `SchemeRef` and Program `TypeTag`/encoding via a descriptor Artifact. + * Must not depend on store semantics or graph/cert layers; descriptor logic must remain store- and TGK-neutral (no provenance or policy semantics baked into the scheme binding). + +* **PEL/TRACE-DAG/1 & ENC/PEL-TRACE-DAG/1:** + + * Keep `ASL/1-CORE`, `PEL/1-CORE`, `PEL/PROGRAM-DAG/1` + this overview in view. + * Define trace shape and encoding; TGK interprets traces into edges, not the other way around. + +* **OPREG/PEL1-KERNEL & OPREG/PEL1-KERNEL-PARAMS/1:** + + * Keep `ASL/1-CORE`, `PEL/1-CORE`, `PEL/PROGRAM-DAG/1` + this overview in view. + * Must only define pure, deterministic ops and param encodings; no store, graph, or policy semantics. + +* **TGK/1-CORE:** + + * Keep `ASL/1-CORE` + this overview in view. + * Must define edges as Artifacts and the graph as a reconstructible projection. + * Must not depend on particular edge encodings (`ENC/TGK1-EDGE/1` is a profile *using* TGK/1-CORE, not a kernel dependency). + * Must not depend on graph store/query details (`TGK/STORE/1` sits *above* TGK/1-CORE). + +* **ENC/TGK1-EDGE/1:** + + * Keep `ASL/1-CORE`, `ENC/ASL1-CORE`, `HASH/ASL1`, `TGK/1-CORE` + this overview in view. + * Must not re-define `EdgeBody` or `EdgeTypeId`; only define how `EdgeBody` ↔ `EdgeBytes` works for a specific profile. + * Must keep `edge_version` as a guard, not an in-place evolution knob; incompatible changes should become a new profile and (typically) a new `TypeTag`. + +* **TGK/STORE/1:** + + * Keep `ASL/1-CORE`, `ASL/1-STORE`, `ENC/ASL1-CORE`, `HASH/ASL1`, `TGK/1-CORE`, `ENC/TGK1-EDGE/1` + this overview in view. + * Must not re-define `Node`, `EdgeBody`, `EdgeTypeId`, or `ProvenanceGraph`; it only exposes and queries the graph that TGK/1-CORE defines. + * Must not introduce new identity schemes for nodes or edges; `Node := Reference`, `EdgeRef := Reference`. + * Must remain policy- and semantics-neutral: no fact, certificate, or provenance *meaning* inside `TGK/STORE/1`, just projection and queries. + * PH06 evidence discipline: implementations MUST cite `tools/tgk/rebuild.py` + receipts (`/amduat/logs/ph06/evidence/tgk/PH06-EV-TGK-REBUILD-001/`) and the + governance bundle `PH06-EV-GOV-REG-001` when wiring store adapters into CLI + workflows. + +* **TGK/PROV/1:** + + * Keep `ASL/1-CORE`, `TGK/1-CORE`, `TGK/STORE/1` (as an implementation surface) + this overview in view. + * Must define provenance operators purely in terms of `ProvenanceGraph` snapshots and finite seed sets; semantics are graph-only and store-neutral. + * Must not depend on particular graph-store APIs (`TGK/STORE/1` is an optimisation/implementation choice, not a semantic dependency). + * Must not bake in CIL/FER/FCT/OI semantics; it is a kernel layer that higher profiles specialise via edge-type selection and policies. + +For any profile (`CIL/1`, `FER/1`, `FCT/1`, `OI/1`, domain-specific): + +* Treat this document as the “ground truth map” of what you’re allowed to assume from the substrate, and where your profile must stop. +* If you find yourself needing new kernel behaviour, that likely means a new `*-CORE` surface, not a profile shortcut. + +--- + +## 8. Document History (Informative) + +* **0.4.0 (2025-11-16):** Relocated the stack overview into `/amduat/tier1/`, trimmed the PH06 scope to the kernel-only surfaces, and documented that certification/receipt/fact/overlay profiles are deferred to future phases. +* **0.3.3 (2025-11-16):** Added `PEL/PROGRAM-DAG-DESC/1` as the canonical DAG scheme descriptor across references, the PEL execution stack, dependency summary, and editing guidance. +* **0.3.2 (2025-11-16):** Added `TGK/PROV/1` to the stack overview as the canonical provenance kernel over `TGK/1-CORE` and wired it through references, dependency summary, and editing guidance. +* **0.3.1 (2025-11-16):** Added `TGK/STORE/1` into the overview as the standard graph store & query profile, clarified dependency summary/editing guidance, and introduced the history section. +* **0.3.0 (2025-11-16):** Initial Tier-1 substrate stack orientation for Amduat 2.0 (kernel, near-core profiles, canonical PEL stack, and profile authoring guidance). + +--- + +* **0.4.1 (2025-11-30):** Highlighted TGK/STORE evidence hooks + PH06 governance linkage. +### Commit message + +**Commit title:** + +`PEL/PROGRAM-DAG-DESC/1 v0.1.6 + SUBSTRATE/STACK-OVERVIEW v0.3.3 — wire DAG scheme descriptor into stack map` + +**Commit body:** + +* `PEL/PROGRAM-DAG-DESC/1` → v0.1.6 + + * Finalised the DAG scheme descriptor value model and encoding, and fixed `SchemeRef_DAG_1` as the canonical `SchemeRef` for `PEL/PROGRAM-DAG/1` under `ASL_ENC_CORE_V1` + `HASH-ASL1-256`. + * Clarified that the descriptor binds the scheme to a Program `TypeTag` and encoding profile and remains store- and graph-neutral. + +* `SUBSTRATE/STACK-OVERVIEW` → v0.3.3 + + * Added `PEL/PROGRAM-DAG-DESC/1` to the references list and the canonical PEL execution stack. + * Described the descriptor’s role in §4.4 and wired it into the dependency summary (§6) and editing guidance (§7) as the scheme binding between `PEL/1-CORE`, `PEL/PROGRAM-DAG/1`, and ASL/1 `SchemeRef`s. + * Orientation-only change; no behavioural rules added. diff --git a/tier1/asl-1-core.md b/tier1/asl-1-core.md new file mode 100644 index 0000000..f0b55d0 --- /dev/null +++ b/tier1/asl-1-core.md @@ -0,0 +1,498 @@ +# ASL/1-CORE — Artifact Substrate Layer (Core) + +Status: Approved +Owner: Niklas Rydberg +Version: 0.4.1 +SoT: Yes +Last Updated: 2025-11-16 +Linked Phase Pack: N/A +Tags: [deterministic, binary-minimalism] + + + +**Document ID:** `ASL/1-CORE` +**Layer:** L0 — Pure logical value model (no persistence / execution semantics) + +**Depends on (normative):** + +* None (foundational model) + +**Informative references:** + +* `ENC/ASL1-CORE v1.x` — canonical encoding profile (`ASL_ENC_CORE_V1`) +* `HASH/ASL1 v0.2.2` — ASL1 hash family and `HashId` assignments +* `ASL/1-STORE v0.4.0` — content-addressable store over ASL/1-CORE +* `TGK/1-CORE v0.7.0` — trace graph kernel over `Reference` +* `PEL/1` — execution substrate + +© 2025 Niklas Rydberg. + +## License + +Except where otherwise noted, this document (text and diagrams) is licensed under +the Creative Commons Attribution 4.0 International License (CC BY 4.0). + +The identifier registries and mapping tables (e.g. TypeTag IDs, HashId +assignments, EdgeTypeId tables) are additionally made available under CC0 1.0 +Universal (CC0) to enable unrestricted reuse in implementations and derivative +specifications. + +Code examples in this document are provided under the Apache License 2.0 unless +explicitly stated otherwise. Test vectors, where present, are dedicated to the +public domain under CC0 1.0. + +--- + +## 0. Conventions + +The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHOULD**, and **MAY** are to be interpreted as in RFC 2119. + +ASL/1-CORE defines **only logical values and their equality**. +It does **not** define storage formats, protocols, runtime APIs, or policy. + +Primitive logical types: + +* `OctetString` — finite sequence of bytes `0x00–0xFF`. +* `uint16`, `uint32`, `uint64` — fixed-width unsigned integers. + +Binary layout, endianness, and on-wire representation come from **encoding profiles**, not from ASL/1-CORE itself. + +--- + +## 1. Purpose & Non-Goals + +### 1.1 Purpose + +`ASL/1-CORE` defines the **artifact substrate** for Amduat 2.0: + +* what an **Artifact** is, +* what a **TypeTag** is, +* what a **Reference** is, and +* how content-addressed identity is defined via canonical encodings and hash functions. + +It aims to make computing **sane** by enforcing that: + +* content and type hints are explicit, +* identity is precise and field-based, +* logical values are immutable, +* all higher behavior (store, execution, provenance, policy) is layered on top. + +All other Amduat layers — STORE, PEL, CIL, FCT, FER, OI, TGK — must respect and build on this substrate. + +### 1.2 Non-goals + +ASL/1-CORE explicitly does **not** define: + +* Store APIs or persistence guarantees. +* Execution runtimes, scheduling, or side-effects. +* Certificates, trust semantics, or authorization. +* Networks, transports, or wire formats. +* Compression, chunking, encryption, or indexing. + +Those are defined by `ASL/1-STORE`, `PEL/1`, `CIL/1`, `FCT/1`, `FER/1`, `OI/1`, `TGK/1-CORE`, and other profiles. + +--- + +## 2. Core Value Model + +### 2.1 OctetString + +```text +OctetString = finite sequence of 8-bit bytes (0x00–0xFF) +``` + +ASL/1-CORE does not assign any structure (e.g., text vs binary) to `OctetString`. +Structure, if any, is introduced by higher-layer semantics keyed off `TypeTag`. + +--- + +### 2.2 TypeTag + +A `TypeTag` identifies how higher layers intend to interpret an Artifact’s bytes. + +```text +TypeTag { + tag_id: uint32 +} +``` + +Properties: + +* `tag_id` is opaque at this layer. +* No particular `tag_id` semantics are defined here. +* `tag_id` participates in identity: change the tag, you’ve changed the Artifact. + +#### 2.2.1 Tag ranges (conventions only) + +By convention (non-normative here): + +* `0x00000000–0x0FFFFFFF` — core stack / shared profiles. +* `0x10000000–0xFFFFFFFF` — extension / domain-specific tags. + +Concrete registries and governance of `tag_id` live in separate documents. + +--- + +### 2.3 Artifact + +An **Artifact** is the fundamental immutable value in ASL/1: + +```text +Artifact { + bytes: OctetString + type_tag: optional TypeTag +} +``` + +Properties: + +* Immutable logical value. +* Two identity-sensitive dimensions: + + * `bytes` — exact content bytes. + * `type_tag` — presence + `tag_id` if present. + +> **ASL/CORE-ART-EQ/1** +> Two Artifacts `A` and `B` are identical in ASL/1-CORE iff: +> +> * `A.bytes` and `B.bytes` are byte-for-byte equal; and +> * either both have no `type_tag`, or both have a `type_tag` and `A.type_tag.tag_id == B.type_tag.tag_id`. + +No encoding profile, store, or runtime may alter this equality. + +> **ASL/CORE-IMMUT/1** +> Once an Artifact value is created, it is considered immutable. Any change to `bytes` or `type_tag` produces a **different** Artifact. + +--- + +### 2.4 HashId + +```text +HashId = uint16 +``` + +A `HashId` identifies a particular hash algorithm in a given family. + +ASL/1-CORE itself is hash-family agnostic. The Amduat 2.0 core stack uses the "ASL1" family defined in HASH/ASL1 as the canonical family for identity-critical References. + +--- + +### 2.5 Reference + +A **Reference** is a content address for an Artifact: + +```text +Reference { + hash_id: HashId + digest: OctetString +} +``` + +Interpretation: + +* `hash_id` selects a hash algorithm (e.g. `HASH-ASL1-256`). +* `digest` is that algorithm’s digest of a canonical encoding of some Artifact. + +> **ASL/CORE-REF-EQ/1** +> Two References `R1` and `R2` are identical iff: +> +> * `R1.hash_id == R2.hash_id`, and +> * `R1.digest` and `R2.digest` are byte-for-byte equal. + +No cross-`hash_id` equivalence is defined at this layer. If two different `(hash_id, digest)` pairs refer to Artifacts that happen to be “the same” in some application sense, that is strictly a higher-layer interpretation. + +--- + +## 3. Encoding Profiles + +ASL/1-CORE separates logical values from concrete encodings via **encoding profiles**. + +### 3.1 EncodingProfileId + +```text +EncodingProfileId = uint16 +``` + +Each encoding profile (e.g. `ASL_ENC_CORE_V1`) is defined in its own document and specifies: + +* canonical `ArtifactBytes` encodings; +* optionally `ReferenceBytes` encodings; +* invariants required to preserve ASL/1-CORE identity. + +The baseline encoding profile in Amduat 2.0 is: + +* `ASL_ENC_CORE_V1 = 0x0001` — defined in `ENC/ASL1-CORE v1.x`. + +### 3.2 Profile requirements + +Any encoding profile used with ASL/1-CORE MUST satisfy: + +1. **Identity preservation** + + For all Artifacts `A` and `B`: + + * `A` and `B` are identical under ASL/CORE-ART-EQ/1 + ⇔ their canonical encodings under that profile are bit-identical. + +2. **Injectivity** + + Distinct Artifacts MUST NOT produce identical canonical encodings. + +3. **Stability and determinism** + + For any Artifact, canonical encoding: + + * MUST be stable across time and implementations, + * MUST NOT depend on environment, clock, locale, or configuration. + +4. **Explicit structure** + + Field ordering and numeric formats MUST be fixed and unambiguous. + +5. **Byte transparency** + + `Artifact.bytes` MUST be encoded exactly as-is (no hidden transcoding). + +6. **Streaming-friendliness** + + Canonical encodings MUST be producible and consumable in a single forward-only pass. + +Encoding profiles MAY impose extra constraints (e.g. on particular `TypeTag` subsets) but MUST NOT break the above. + +--- + +## 4. Hashing and Reference Derivation + +ASL/1-CORE defines how canonical encodings and hash functions combine to produce References. + +### 4.1 Canonical encoding step + +Given: + +* Artifact `A`, +* encoding profile `P` with canonical encoder `encode_P(A) -> ArtifactBytes`, + +`encode_P` MUST satisfy §3.2. + +### 4.2 Reference derivation rule + +Given: + +* Artifact `A`, +* encoding profile `P`, +* hash algorithm `H` with: + + * `HashId = HID`, + * fixed digest length `L` bytes, + +then the Reference `R` for `A` under `(P, H)` is: + +```text +ArtifactBytes = encode_P(A) +digest = H(ArtifactBytes) +Reference = { hash_id = HID, digest = digest } +``` + +> **ASL/CORE-REF-DERIVE/1** +> Any component that claims to derive References from Artifacts for a given `(EncodingProfileId, HashId)` **MUST** use this exact procedure. + +### 4.3 Deterministic agreement lemma (informative) + +For any two conformant implementations that share: + +* the same encoding profile `P`, and +* the same hash algorithm `H` with `HashId = HID`, + +then for any Artifact `A`: + +* both will compute identical `ArtifactBytes`, +* both will compute identical `digest = H(ArtifactBytes)`, +* both will form identical `Reference {hash_id = HID, digest = digest}`. + +This is the basis for cross-Store and cross-domain determinism in Amduat. + +### 4.4 Canonical family for Amduat 2.0 (informative) + +While ASL/1-CORE is conceptually family-agnostic, the **Amduat 2.0 substrate** standardizes: + +* `ASL_ENC_CORE_V1` as the canonical Artifact encoding profile; +* `HASH-ASL1-256` (`HashId = 0x0001`) as the canonical default hash algorithm for identity-critical surfaces. + +Other `(EncodingProfileId, HashId)` pairs are allowed but must be explicitly declared by the consuming profile or implementation. + +### 4.5 Crypto agility + +ASL/1-CORE supports evolution by: + +* delegating algorithm definitions and `HashId` assignments to `HASH/ASL1`; +* delegating binary encodings to `ENC/*` profiles. + +Higher layers MAY: + +* compute multiple References for the same Artifact (multi-hash, multi-encoding), +* define migration policies, +* mark some References as “preferred” or “legacy”. + +ASL/1-CORE itself: + +* treats References as opaque `(hash_id, digest)` pairs; +* does not specify any relationship between different References to “the same” Artifact other than equality within that pair. + +--- + +## 5. Logical vs Physical Representation + +### 5.1 Logical-only substrate + +Artifacts and References are **logical values**. + +ASL/1-CORE: + +* does not care where or how they’re stored; +* does not care how they’re transported; +* does not assume any particular API shape. + +### 5.2 Internal representation freedom + +Implementations MAY represent values as: + +* structs, +* slices, +* memory-mapped buffers, +* immutable trees, +* or any other structure, + +so long as they can: + +* emit canonical encodings for supported profiles, +* compute hashes correctly, +* respect ASL/1-CORE identity semantics. + +### 5.3 Passing values between layers + +Passing `Artifact` or `Reference` between components: + +* means passing a **value**, not a mutable object. + +Implementations: + +* MAY share underlying buffers internally, +* MUST treat the logical value as immutable, +* MUST NOT let in-place mutation change a value that has already been observed as an Artifact or Reference. + +--- + +## 6. Identity, Equality, and Collisions + +### 6.1 Artifact identity + +Restating for emphasis: + +> **ASL/CORE-ART-ID/1** +> Artifact identity is purely field-based: +> +> * `bytes` equality + `type_tag` presence + `tag_id` equality (if present). + +Encoding profiles and hash functions MUST preserve this identity; they MUST NOT introduce alternative notions of “the same artifact” at this layer. + +### 6.2 Reference identity + +> **ASL/CORE-REF-ID/1** +> Reference identity is purely: +> +> * `hash_id` equality + `digest` byte equality. + +Different `(hash_id, digest)` pairs are always distinct References, even if they logically point to the same underlying Artifact as understood by some higher layer. + +### 6.3 Collision assumptions + +ASL/1-CORE assumes the configured hash algorithms are **cryptographically strong**: + +* collisions are treated as extraordinary substrate failures, not supported behavior. + +If two distinct Artifacts produce the same `(hash_id, digest)`: + +* ASL/1-CORE itself does not define remediation; +* `ASL/1-STORE` is responsible for surfacing this as an integrity error; +* higher profiles (e.g. CIL/1, FCT/1) MAY define detection and response strategies. + +--- + +## 7. Relationship to Other Layers (Informative) + +### 7.1 ASL/1-STORE + +`ASL/1-STORE`: + +* models StoreInstances as partial mappings `Reference -> Artifact`, +* parameterizes each StoreInstance by a single `StoreConfig = {encoding_profile, hash_id}`, +* uses ASL/CORE-REF-DERIVE/1 to compute References in `put`, +* respects ASL/CORE-ART-ID/1 and ASL/CORE-REF-ID/1. + +STORE adds persistence, error semantics, and StoreConfig; it does not change the core value model. + +### 7.2 TGK/1-CORE + +`TGK/1-CORE`: + +* treats `Reference` as graph nodes, +* treats specific Artifacts (EdgeArtifacts) as encodings of graph edges, +* defines a ProvenanceGraph as a projection over Artifacts and configured profiles. + +TGK relies on ASL/1-CORE to ensure: + +* Artifacts are immutable, +* References are stable and deterministic across implementations, +* all provenance evidence is expressed as Artifacts and References. + +### 7.3 PEL/1, CIL/1, FCT/1, FER/1, OI/1 + +These layers: + +* allocate specific `TypeTag` ranges and schemas, +* encode programs, execution traces, certificates, facts, overlays as Artifacts, +* use References consistently via ASL/CORE-REF-DERIVE/1, +* may store those values in ASL/1-STORE and expose them through TGK. + +They must not override or reinterpret ASL/1-CORE equality; they build on it. + +--- + +## 8. Conformance + +An implementation is **ASL/1-CORE–conformant** if it: + +1. **Implements the value types** + + * Provides logical structures for `Artifact`, `TypeTag`, and `Reference` with at least the fields described in §2. + +2. **Respects Artifact and Reference equality** + + * Implements identity exactly as in ASL/CORE-ART-EQ/1 and ASL/CORE-REF-EQ/1 (and the derived ID invariants). + +3. **Uses encoding profiles appropriately** + + * Uses only encoding profiles that satisfy §3.2. + * For any encoding profile it claims to support, can produce canonical encodings for all Artifacts. + +4. **Derives References correctly** + + * Derives References strictly according to ASL/CORE-REF-DERIVE/1 for the declared `(EncodingProfileId, HashId)` pair. + +5. **Enforces immutability** + + * Treats Artifacts and References as immutable logical values. + * Does not leak any mechanism that would let a consumer mutate an Artifact or Reference “in place”. + +6. **Maintains separation of concerns** + + * Does not embed storage, execution, policy, or graph semantics into ASL/1-CORE constructs. + * Leaves stores, execution engines, and graph kernels to their respective layers. + +Everything else — API design, transport formats, performance characteristics, deployment topology — lies outside ASL/1-CORE and MUST be specified by separate surfaces. + +--- + +## Document History + +* **0.4.1 (2025-11-16):** Registered as Tier-1 spec and aligned to the Amduat 2.0 substrate baseline. diff --git a/tier1/asl-1-store.md b/tier1/asl-1-store.md new file mode 100644 index 0000000..06e09bf --- /dev/null +++ b/tier1/asl-1-store.md @@ -0,0 +1,513 @@ +# ASL/1-STORE — Content-Addressable Store (Core) + +Status: Approved +Owner: Niklas Rydberg +Version: 0.4.0 +SoT: Yes +Last Updated: 2025-11-16 +Linked Phase Pack: N/A +Tags: [deterministic, import-export] + + + +**Document ID:** ASL/1-STORE +**Layer:** L0.5 — Store model over ASL/1-CORE (above value model, below execution/provenance) + +**Depends on (normative):** + +* `ASL/1-CORE v0.3.x` — value substrate: `Artifact`, `Reference`, `TypeTag`, identity model + +**Informative references:** + +* `ENC/ASL1-CORE v1.0.x` — canonical encodings for `Artifact` / `Reference` (e.g. `ASL_ENC_CORE_V1`) +* `HASH/ASL1 v0.2.x` — ASL1 hash family (`HashId`, e.g. `HASH-ASL1-256`) +* `TGK/1-CORE v0.7.x` — trace graph kernel over `Reference` +* `PEL/1` — execution substrate (uses ASL/1-STORE for I/O) +* `CIL/1`, `FCT/1`, `FER/1`, `OI/1` — profiles that rely on content-addressable storage + +> **Versioning note** +> ASL/1-STORE is agnostic to minor revisions of these informative documents, provided they preserve: +> +> * the ASL/1-CORE definitions of `Artifact`, `Reference`, `TypeTag`, and identity, and +> * the existence of at least one canonical encoding and hash configuration usable for reference derivation. + +© 2025 Niklas Rydberg. + +## License + +Except where otherwise noted, this document (text and diagrams) is licensed under +the Creative Commons Attribution 4.0 International License (CC BY 4.0). + +The identifier registries and mapping tables (e.g. TypeTag IDs, HashId +assignments, EdgeTypeId tables) are additionally made available under CC0 1.0 +Universal (CC0) to enable unrestricted reuse in implementations and derivative +specifications. + +Code examples in this document are provided under the Apache License 2.0 unless +explicitly stated otherwise. Test vectors, where present, are dedicated to the +public domain under CC0 1.0. + + +--- + +## 0. Conventions + +### 0.1 RFC 2119 terminology + +The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, +**SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** are to be +interpreted as described in RFC 2119. + +### 0.2 Terms from ASL/1-CORE + +This specification reuses the following concepts from `ASL/1-CORE`: + +* **Artifact** + + ```text + Artifact { + bytes: OctetString + type_tag: optional TypeTag + } + ``` + +* **Reference** + + ```text + Reference { + hash_id: HashId + digest: OctetString + } + ``` + +* **TypeTag** — `uint32` hint for intended interpretation of `Artifact.bytes`. + +* **HashId** — `uint16` identifying a hash algorithm (e.g. in `HASH/ASL1`). + +Where this document says **ArtifactRef**, it means an ASL/1 `Reference` that logically identifies an `Artifact` under the identity rules of `ASL/1-CORE`. + +### 0.3 Additional terminology + +* **StoreInstance** — an abstract content-addressable store implementing ASL/1-STORE semantics. +* **StoreConfig** — the identity-related configuration of a StoreInstance (see §2.2). +* **StoreSnapshot** — the logical state of a StoreInstance at some instant: a finite mapping from `Reference` to `Artifact`, plus its fixed `StoreConfig`. +* **ExecutionEnvironment** — any deployment context (process, node, cluster) that hosts one or more StoreInstances; used only informatively. + +ASL/1-STORE defines **logical semantics** only. Physical representation (files, DB rows, object stores), indexing, and transport are out of scope. + +--- + +## 1. Purpose, Scope & Non-Goals + +### 1.1 Purpose + +`ASL/1-STORE` defines the **minimal content-addressable store model** over `ASL/1-CORE` values. + +It provides: + +* The notion of a **StoreInstance** as a partial mapping: + + ```text + Reference -> Artifact // zero or one Artifact per Reference + ``` + +* The semantics of two core operations: + + * `put(Artifact) -> Reference` + * `get(Reference) -> Artifact | error` + +* A small, logical **error model** at the store boundary. + +The goals are: + +* **Determinism:** same Artifact, same configuration ⇒ same Reference and same store behavior. +* **Immutability:** once a Reference is associated with an Artifact, that association does not change. +* **Separation of concerns:** ASL/1-STORE defines logical behavior; physical storage and APIs are separate concerns. + +> **STORE/CORE-MINIMAL/1** +> ASL/1-STORE **MUST** remain a thin logical layer over ASL/1-CORE: it defines what a content-addressable store *is* and how `put`/`get` behave; it **MUST NOT** embed higher-level concepts such as execution, provenance, or policy. + +### 1.2 Non-goals + +ASL/1-STORE explicitly does **not** define: + +* Concrete APIs (HTTP, gRPC, language-specific interfaces). +* Authentication, authorization, tenancy, or quotas. +* Replication, redundancy, durability, retention, or garbage collection policies. +* Chunking, compression, encryption, or indexing strategies. +* Network discovery, routing, or federation protocols. + +These are the responsibility of higher-layer specifications, implementation profiles, and operational policy. + +--- + +## 2. Core Store Model + +### 2.1 StoreInstance as a partial mapping + +At any given StoreSnapshot, a StoreInstance can be viewed as a partial function: + +```text +StoreSnapshot.M : Reference -> Artifact // 0 or 1 Artifact per Reference +``` + +Properties: + +* For any given `ref`, `StoreSnapshot.M(ref)` is either: + + * undefined (no stored Artifact), or + * a single `Artifact` value. +* There are no duplicate or conflicting mappings in a single snapshot. + +ASL/1-STORE does not specify how snapshots are implemented (MVCC, copy-on-write, etc.). It only constrains the logical mapping at any instant. + +### 2.2 StoreConfig (identity configuration) + +Each StoreInstance has a **StoreConfig** that determines how References are derived: + +```text +StoreConfig { + encoding_profile: EncodingProfileId // e.g. ASL_ENC_CORE_V1 + hash_id: HashId // e.g. 0x0001 (HASH-ASL1-256) +} +``` + +Constraints: + +* `encoding_profile` MUST name a canonical encoding profile for `Artifact` (e.g. `ASL_ENC_CORE_V1` from `ENC/ASL1-CORE`). +* `hash_id` MUST identify a fixed hash algorithm (e.g. `HASH-ASL1-256`) whose behavior is stable as per `HASH/ASL1`. + +> **STORE/CONFIG-FIXED/CORE/1** +> For a given StoreSnapshot, `StoreConfig.encoding_profile` and `StoreConfig.hash_id` are fixed. All `put` and `get` operations in that snapshot are interpreted relative to that configuration. + +Implementations MAY support multiple configurations (e.g. separate namespaces per profile), but each StoreInstance, as seen through ASL/1-STORE, is always parameterised by a single `StoreConfig`. + +### 2.3 Relationship to ASL/1-CORE identity + +ASL/1-CORE defines how a `Reference` is derived from an `Artifact` given an encoding profile and hash algorithm: + +```text +ArtifactBytes = encode_P(Artifact) +digest = H(ArtifactBytes) +Reference = { hash_id = HID, digest = digest } +``` + +ASL/1-STORE **reuses** this rule unchanged. For a StoreInstance with `StoreConfig` = `{ encoding_profile = P, hash_id = HID }`: + +* `put` MUST derive `Reference` values exactly via the ASL/1-CORE rule for `(P, HID)`. +* `get` MUST respect the mapping semantics defined in §3.2. + +ASL/1-STORE does **not** introduce any new notion of identity beyond ASL/1-CORE. + +--- + +## 3. Store Operations + +ASL/1-STORE defines two mandatory logical operations: + +* `put(Artifact) -> Reference` +* `get(Reference) -> Artifact | error` + +Concrete APIs MUST be semantically equivalent to these. + +### 3.1 `put(Artifact) -> Reference` + +**Logical signature:** + +```text +put(artifact: Artifact) -> Reference | error +``` + +Let the StoreInstance have `StoreConfig`: + +* `P = encoding_profile` +* `HID = hash_id` +* `H =` hash algorithm associated with `HID` + +**Semantics:** + +1. Compute the canonical encoding of `artifact` under `P`: + + ```text + ArtifactBytes = encode_P(artifact) + ``` + +2. Compute the Reference under `(P, H)` as per ASL/1-CORE: + + ```text + digest = H(ArtifactBytes) + reference = Reference { hash_id = HID, digest = digest } + ``` + +3. Consider the current StoreSnapshot mapping `M`: + + * If `M(reference)` is **undefined** (no existing Artifact stored under `reference`): + + * Logically define `M'(reference) = artifact`. + * All other mappings remain unchanged. + + * If `M(reference) = artifact'` is **defined**: + + * If `artifact'` is **identical** to `artifact` in the ASL/1-CORE sense (same bytes, same type_tag status and value), then: + + * `M' = M` (no logical change). + * If `artifact'` is **not** identical to `artifact`, this is a **collision**: + the store **MUST NOT** silently replace or merge the artifacts. It MUST treat this as an integrity error (see §4). + +4. Return `reference` (or the appropriate error in the collision case). + +> **STORE/PUT-IDEMP/CORE/1** +> For a given StoreInstance and `StoreConfig`, repeated calls to `put` with identical `Artifact` inputs **MUST** always return the same `Reference`, and must not change the logical mapping after the first successful insertion. + +> **STORE/PUT-NO-ALIAS/CORE/1** +> A StoreInstance **MUST NOT** associate two non-identical Artifacts with the same `Reference` (under its `StoreConfig`). Any attempt to do so **MUST** result in an integrity error. + +Implementations MAY optimize by: + +* caching `ArtifactBytes`, +* deduplicating storage, +* or short-circuiting `put` if a `Reference` is known to exist. + +These do not affect the logical semantics. + +### 3.2 `get(Reference) -> Artifact | error` + +**Logical signature:** + +```text +get(ref: Reference) -> Artifact | error +``` + +Let `M` be the current StoreSnapshot mapping. + +**Semantics:** + +* If `M(ref)` is **defined** (there is a stored Artifact `A`): + + * `get(ref)` MUST return an Artifact identical to `A` in the ASL/1-CORE sense. + +* If `M(ref)` is **undefined**: + + * `get(ref)` MUST fail with `ERR_NOT_FOUND` (see §4.1). + +> **STORE/GET-PURE/CORE/1** +> For a fixed StoreSnapshot and `ref`: +> +> * If `M(ref)` is defined as `A`, repeated `get(ref)` calls **MUST** return Artifacts identical to `A`. +> * If `M(ref)` is undefined, repeated `get(ref)` calls **MUST** consistently return `ERR_NOT_FOUND`, unless the mapping is changed by a subsequent `put` or administrative import. + +ASL/1-STORE does **not** require that `get` recompute and re-verify the digest on every access. It does require that: + +* implementations maintain internal invariants so that `M(ref)` remains consistent with the ASL/1-CORE identity rule for the configured `StoreConfig`; and +* any detected inconsistencies are treated as integrity errors (see §4.1). + +### 3.3 Deletion, GC, and administrative changes (informative) + +ASL/1-STORE does not standardize deletion or garbage collection, but logically: + +* Removing a mapping `ref -> Artifact` transforms `M(ref)` from defined to undefined. +* After such removal, `get(ref)` must return `ERR_NOT_FOUND`. + +How and when such changes occur (manual deletion, GC, retention policies) is up to higher layers and operational policy. + +--- + +## 4. Error Model + +ASL/1-STORE defines a minimal logical error model. Concrete APIs may map these to exceptions, status codes, or error variants, but MUST preserve their semantics. + +### 4.1 Core error categories + +1. **Not Found — `ERR_NOT_FOUND`** + + Condition: + + * `get(ref)` is invoked and `M(ref)` is undefined in the current StoreSnapshot. + +2. **Integrity Error — `ERR_INTEGRITY`** + + Conditions include (non-exhaustive): + + * A `put` would associate a `Reference` with an `Artifact` different from the `Artifact` already stored under that `Reference` (violating `STORE/PUT-NO-ALIAS/CORE/1`). + * An internal invariant check reveals that for some `(ref, A)` in `M`, the canonical encoding and hash under `StoreConfig` no longer produce `ref` for `A` (corruption or misconfiguration). + + Behavior on integrity errors (e.g. fail-fast vs quarantine) is implementation and policy dependent, but: + + > **STORE/INTEGRITY-NO-SILENCE/CORE/1** + > A StoreInstance **MUST NOT** silently accept or mask conditions that violate ASL/1 identity invariants. Such conditions **MUST** manifest as `ERR_INTEGRITY` (or an error that is semantically at least as strong) at the store’s API or monitoring boundary. + +3. **Unsupported Identity Configuration — `ERR_UNSUPPORTED`** + + Conditions (examples): + + * A `put` or internal operation requires computing a `Reference` using an `encoding_profile` or `hash_id` that the StoreInstance does not implement. + * A `get` is invoked with a `Reference.hash_id` that the StoreInstance is not configured to support, and the StoreInstance’s policy is to reject such references rather than treating them as potentially unmapped. + + Implementations MAY choose to: + + * Accept unknown `hash_id` values but always treat them as “possibly unmapped” (effectively `ERR_NOT_FOUND` if no mapping exists); or + * Reject them explicitly as `ERR_UNSUPPORTED`. + +ASL/1-STORE does not standardize I/O failures, timeouts, or auth errors; those are part of concrete API and deployment design. + +--- + +## 5. Locality & Data Movement (Informative) + +ASL/1-STORE is a logical model, but real systems care about data movement. To keep computing sane and efficient, implementations are encouraged to follow a **data movement minimization principle**. + +### 5.1 Within a StoreInstance + +Within a single StoreInstance and its **co-located** consumers (e.g. PEL/1 engines, TGK/1 ingestors, CIL/1 logic in the same process): + +* Implementations **SHOULD** avoid copying Artifact bytes unnecessarily. +* They MAY: + + * Represent `Artifact.bytes` as immutable views over underlying buffers (e.g. memory-mapped files, shared segments). + * Pass those views directly to co-located components instead of serializing/deserializing repeatedly. + * Delay or avoid materializing full `ArtifactBytes` unless required. + +These optimizations are invisible at the ASL/1-STORE level as long as: + +* returned `Artifact`s satisfy ASL/1-CORE equality; and +* `put`/`get` semantics remain as defined. + +### 5.2 Across StoreInstances (inter-store transfer) + +ASL/1-STORE does not define a transfer protocol, but the **logical meaning** of transferring an Artifact from `S_src` to `S_dst` is: + +1. `artifact = S_src.get(ref_src)` +2. If `artifact` is `ERR_NOT_FOUND`, transfer fails with `ERR_NOT_FOUND`. +3. Otherwise, `ref_dst = S_dst.put(artifact)` +4. Return `ref_dst`. + +If `S_src` and `S_dst` share the same `StoreConfig` (`encoding_profile` and `hash_id`), then: + +* For any `Artifact`, `ref_dst` MUST equal `ref_src`. + +If they differ (e.g. different hash or encoding), then: + +* `ref_dst` MAY differ from `ref_src` while still identifying an Artifact identical in the ASL/1 sense. +* Higher layers (e.g. overlays, provenance profiles) MAY track both references. + +Implementations **SHOULD** send only necessary data (canonical bytes or equivalent) and deduplicate at the destination by `Reference`. + +--- + +## 6. Interaction with Other Layers (Informative) + +### 6.1 PEL/1 (Primitive Execution Layer) + +PEL/1 typically: + +* Uses a co-located StoreInstance for: + + * fetching input Artifacts by `Reference` (`get`), and + * persisting outputs and ExecutionResults (`put`). + +Given ASL/1-STORE semantics: + +* PEL/1 can rely on `get(ref)` to be pure and deterministic for a fixed snapshot. +* PEL/1 can rely on `put(artifact)` to be idempotent and to provide a stable `Reference` used elsewhere (e.g. in TGK edges, receipts, or facts). + +ASL/1-STORE does not constrain PEL/1 scheduling, side effects, or execution policies. + +### 6.2 TGK/1-CORE (Trace Graph Kernel) + +TGK/1-CORE treats StoreInstances as one of many possible sources of Artifacts: + +* EdgeArtifacts and other provenance-relevant Artifacts may be stored in ASL/1-STORE. +* TGK/1-CORE then builds a `ProvenanceGraph` over their `Reference`s. + +ASL/1-STORE provides: + +* stable `put`/`get` semantics for resolving `Reference -> Artifact`; +* immutability guarantees that underpin TGK’s projection invariants. + +### 6.3 CIL/1, FCT/1, FER/1, OI/1 + +Certification, transaction, and overlay layers: + +* Use `put` to persist certificate Artifacts, fact Artifacts, evidence bundles, overlay records, etc. +* Use `get` to resolve `Reference`s when verifying proofs, reconstructing receipts, or answering queries. + +They rely on ASL/1-STORE to: + +* maintain consistent mappings for `Reference -> Artifact`; +* avoid silent collisions; +* distinguish `ERR_NOT_FOUND` vs `ERR_INTEGRITY` vs `ERR_UNSUPPORTED` at the storage boundary. + +--- + +## 7. Conformance + +An implementation is **ASL/1-STORE–conformant** if, for each StoreInstance it exposes, it satisfies all of the following: + +1. **StoreConfig correctness** + + * Associates a well-defined `StoreConfig` (`encoding_profile`, `hash_id`) with each StoreInstance. + * Uses that configuration consistently for all `put` and internal identity-related operations in a StoreSnapshot. + +2. **Correct `put` semantics** + + * Implements `put(Artifact)` as in §3.1: + + * derives `Reference` via ASL/1-CORE’s canonical encoding and hashing rule for its `StoreConfig`; + * ensures `STORE/PUT-IDEMP/CORE/1` and `STORE/PUT-NO-ALIAS/CORE/1`. + +3. **Correct `get` semantics** + + * Implements `get(Reference)` as in §3.2: + + * if a mapping exists, returns an Artifact identical (ASL/1-CORE equality) to the stored value; + * if no mapping exists, returns `ERR_NOT_FOUND`. + + * Guarantees `STORE/GET-PURE/CORE/1` for any fixed StoreSnapshot. + +4. **Integrity handling** + + * Detects and surfaces integrity violations as `ERR_INTEGRITY` (or stricter), consistent with `STORE/INTEGRITY-NO-SILENCE/CORE/1`. + * Does not silently accept collisions or identity-breaking inconsistencies. + +5. **Identity preservation** + + * Ensures that any `(ref, artifact)` mapping established by `put` is consistent with ASL/1-CORE’s definition of `Reference` for the configured `StoreConfig`. + * Does not introduce alternate identity notions (e.g. “object IDs”, “paths”) that override or replace `Reference` at this layer. + +6. **Separation of logical semantics from implementation** + + * Treats physical layout, caching, chunking, and replication as internal concerns that do not alter the logical `put`/`get` behavior. + * Does not require clients to know about file paths, DB keys, or internal topologies for correctness. + +7. **Profile compatibility (if claimed)** + + * If the implementation claims compatibility with specific encoding profiles (e.g. `ENC/ASL1-CORE v1`) and hash families (`HASH/ASL1`), it actually implements them according to those specifications. + * Any additional surfaces (e.g. “multi-profile stores”, “multi-hash stores”) are documented as separate layers or profiles and do not violate the core semantics above. + +Everything else — transport design, API shape, performance characteristics, distribution, and operational policies — lies outside ASL/1-STORE and may be specified by separate documents and implementation guides. + +--- + +## 8. Evolution (Informative) + +ASL/1-STORE is intended to evolve **additively**: + +* New encoding profiles (`EncodingProfileId`s) and hash algorithms (`HashId`s) can be introduced by `ENC/ASL1-CORE` and `HASH/ASL1` without changing ASL/1-STORE. +* New store-level profiles (e.g. “sharded store”, “append-only store”, “multi-profile store”) can be defined as long as they respect the core semantics of `put`/`get`. + +ASL/1-STORE itself MUST NOT be changed in a way that: + +* alters the meaning of existing `StoreConfig` combinations; or +* permits a conformant StoreInstance to associate two different Artifacts with the same `Reference` under the same configuration. + +Such changes would be considered a new major surface (e.g. `ASL/2-STORE`), not an evolution of `ASL/1-STORE`. + +This aligns with the broader Amduat principle: + +> **Evolve by addition and explicit versioning; never rewrite identity or history.** + +--- + +## Document History + +* **0.4.0 (2025-11-16):** Registered as Tier-1 spec and aligned to the Amduat 2.0 substrate baseline. diff --git a/tier1/enc-asl1-core.md b/tier1/enc-asl1-core.md new file mode 100644 index 0000000..7e4c4d9 --- /dev/null +++ b/tier1/enc-asl1-core.md @@ -0,0 +1,598 @@ +# ENC/ASL1-CORE v1 — Core Canonical Encoding Profile + +Status: Approved +Owner: Niklas Rydberg +Version: 1.0.5 +SoT: Yes +Last Updated: 2025-11-16 +Linked Phase Pack: N/A +Tags: [deterministic, binary-minimalism] + + + +**Document ID:** `ENC/ASL1-CORE` +**Profile ID:** `ASL_ENC_CORE_V1 = 0x0001` +**Layer:** Substrate Primitive Profile (Canonical Encoding) + +**Depends on (normative):** + +* **ASL/1-CORE v0.4.1** (value model: `Artifact`, `TypeTag`, `Reference`, `HashId`) + +**Integrates with (cross-profile rules):** + +* **HASH/ASL1 v0.2.4** (ASL1 hash family: registry of `HashId → algorithm, digest length`) + + * This profile does **not** depend on HASH/ASL1 to define its layouts. + * When both profiles are implemented, additional cross-checks apply (see §4.4, §5). + +**Used by (descriptive):** + +* ASL/1-CORE identity semantics (canonical encodings as the basis for hashing) +* ASL/1-STORE (persistence and integrity) +* PEL/1 (execution artifacts and results) +* CIL/1, FER/1, FCT/1, OI/1 (typed envelopes, receipts, facts, overlays) +* HASH/ASL1 (interpretation and checking of `ReferenceBytes`) + +> The Profile ID `ASL_ENC_CORE_V1` and this document’s version are **not** encoded into `ArtifactBytes` or `ReferenceBytes`. Encoding version is selected by context (deployment, profile, or store configuration), not embedded per value. + +© 2025 Niklas Rydberg. + +## License + +Except where otherwise noted, this document (text and diagrams) is licensed under +the Creative Commons Attribution 4.0 International License (CC BY 4.0). + +The identifier registries and mapping tables (e.g. TypeTag IDs, HashId +assignments, EdgeTypeId tables) are additionally made available under CC0 1.0 +Universal (CC0) to enable unrestricted reuse in implementations and derivative +specifications. + +Code examples in this document are provided under the Apache License 2.0 unless +explicitly stated otherwise. Test vectors, where present, are dedicated to the +public domain under CC0 1.0. + + +--- + +## 0. Overview + +`ENC/ASL1-CORE v1` defines the **canonical, streaming-friendly, injective binary encoding** used across the Amduat 2.0 substrate for two core value types from ASL/1-CORE: + +1. **ArtifactBytes** — canonical bytes for an ASL/1 `Artifact` +2. **ReferenceBytes** — canonical bytes for an ASL/1 `Reference` + +This profile ensures: + +* **Injectivity** — each ASL/1 value maps to exactly one byte string. +* **Determinism** — identical values yield identical encodings across implementations. +* **Stability** — bytes never depend on platform, locale, endian, or environment. +* **Streaming-compatibility** — encoders, decoders, and hashers operate in forward-only mode. + +`ASL_ENC_CORE_V1` is the **canonical ASL/1 encoding profile** used by the Amduat 2.0 substrate stack for: + +* ASL/1 identity model (via canonical encoding + ASL1 hashing), +* the hashing substrate (HASH/ASL1), +* ASL/1-STORE persistence semantics, +* PEL/1 execution input/output artifacts, +* and canonical near-core profiles. + +The encodings defined in this profile satisfy all canonical encoding requirements in `ASL/1-CORE §3.2`: injectivity, stability, determinism, explicit structure, type-sensitivity, byte-transparency, and streaming-friendliness. + +--- + +## 1. Scope & Layering + +### 1.1 Purpose + +This specification defines: + +* The **canonical binary layout** for `ArtifactBytes` and `ReferenceBytes`. +* Normative encoding and decoding procedures. +* How these encodings interact with the ASL1 hash family. +* Required consistency checks when HASH/ASL1 is present. +* Streaming and injectivity requirements. + +### 1.2 Non-goals + +This profile does **not** define: + +* Any filesystem, transport, or database representation. +* Chunking or multipart strategies for large artifacts. +* Any alternative encoding families (those are separate profiles). +* Semantics of `TypeTag` values or registry rules. +* Storage layout, replication, or policy. + +Those concerns belong to ASL/1-STORE, PEL/1, HASH/ASL1, and higher layers. + +### 1.3 Layering constraints + +In line with the substrate overview: + +* `ENC/ASL1-CORE` is a **near-core substrate profile**, not a kernel primitive. +* It **MUST NOT** re-define `Artifact`, `Reference`, `TypeTag`, or `HashId`; those are defined solely by `ASL/1-CORE`. +* It is **storage-neutral** and **policy-neutral**. +* It defines exactly one canonical encoding profile: `ASL_ENC_CORE_V1`. + +--- + +## 2. Conventions + +The key words **MUST**, **SHOULD**, **MAY**, etc. follow RFC 2119. + +### 2.1 Integer encodings + +All multi-byte integers are encoded as **big-endian**: + +* `u8` — 1 byte +* `u16` — 2 bytes +* `u32` — 4 bytes +* `u64` — 8 bytes + +Only **fixed-width** integers are used. + +### 2.2 Booleans (presence flags) + +Booleans used as presence flags are encoded as: + +* `false` → `0x00` +* `true` → `0x01` + +Booleans are only used for presence flags, never for general logical conditions. + +### 2.3 OctetString + +Except where explicitly overridden, an `OctetString` is encoded as: + +```text +[length (u64)] [raw bytes] +``` + +* `length` is the number of bytes. +* `length` MAY be zero. +* There is no implicit terminator or padding. + +Whenever this profile says an ASL/1 field is an `OctetString`, its canonical encoding is this `u64 + bytes` form **unless explicitly stated otherwise**. + +> **Exception:** `Reference.digest` is encoded without an explicit length field; see §4.2. + +--- + +## 3. Artifact Encoding + +### 3.1 Logical structure (from ASL/1-CORE) + +From `ASL/1-CORE`: + +```text +TypeTag { + tag_id: uint32 +} + +Artifact { + bytes: OctetString + type_tag: optional TypeTag +} +``` + +`TypeTag` semantics (registries, meaning of tag IDs) are opaque at this layer. + +### 3.2 Canonical layout: ArtifactBytes + +The canonical binary layout for an `Artifact` is: + +```text ++----------------------+-------------------------+---------------------------+ +| has_type_tag (u8) | [type_tag (u32)] | bytes_len (u64) | ++----------------------+-------------------------+---------------------------+ +| bytes (b[bytes_len]) ... ++------------------------------------------------------------------------ +``` + +Fields: + +1. **has_type_tag (u8)** — presence flag for `type_tag` + + * `0x00` → no `type_tag` + * `0x01` → `type_tag` is present and follows immediately + +2. **type_tag (u32)** — only present if `has_type_tag == 0x01` + + * Encodes `TypeTag.tag_id` as a 32-bit unsigned integer. + +3. **bytes_len (u64)** + + * Length in bytes of `Artifact.bytes`. + * MAY be zero. + +4. **bytes** + + * Raw bytes of `Artifact.bytes` (payload). + +No padding, alignment, or variant tags are introduced beyond what is explicitly described above. + +### 3.3 Encoding (normative) + +Let `A` be an `Artifact`. The canonical encoding function: + +```text +encode_artifact_core_v1 : Artifact → ArtifactBytes +``` + +is defined as: + +1. Emit `has_type_tag` (`u8`): + + * `0x00` if `A.type_tag` is absent. + * `0x01` if `A.type_tag` is present. + +2. If `A.type_tag` is present, emit `A.type_tag.tag_id` as `u32`. + +3. Let `bytes_len = len(A.bytes)`; emit `bytes_len` as `u64`. + +4. Emit the raw bytes of `A.bytes`. + +The result is the canonical `ArtifactBytes`. + +This encoding satisfies the `ASL/1-CORE §3.2` requirements: injective, stable, deterministic, explicit in structure, type-sensitive, byte-transparent, and streaming-friendly. + +### 3.4 Decoding (normative) + +Given a byte slice known to contain exactly one `ArtifactBytes` value, the canonical decoding function: + +```text +decode_artifact_core_v1 : ArtifactBytes → Artifact +``` + +is defined as: + +1. Read `has_type_tag` (`u8`). + + * If the value is neither `0x00` nor `0x01`, fail with an encoding error. + +2. If `has_type_tag == 0x01`, read `tag_id (u32)` and construct `TypeTag{ tag_id }`. + +3. Read `bytes_len (u64)`. + +4. Read exactly `bytes_len` bytes; this is `bytes`. + +5. Construct `Artifact{ bytes, type_tag }` where `type_tag` is either `None` or `Some(TypeTag{ tag_id })` per steps above. + +Decoders MUST reject: + +* Invalid presence flags (`has_type_tag` not in `{0x00, 0x01}`). +* Truncated sequences (insufficient bytes for declared lengths). +* Over-long sequences where `bytes_len` cannot be represented or allocated safely in the implementation’s execution model (encoding error). +* Trailing bytes if the decoding context expects an isolated `ArtifactBytes` value. + +### 3.5 Injectivity + +The mapping: + +```text +Artifact → ArtifactBytes +``` + +defined by `encode_artifact_core_v1` is **injective**: + +* Each `Artifact` value has exactly one canonical byte string. +* Decoding the canonical bytes via `decode_artifact_core_v1` yields exactly that `Artifact`. + +### 3.6 Streaming properties + +Encoders and decoders MUST NOT require backtracking: + +* The header (`has_type_tag`, optional `type_tag`, `bytes_len`) is computed and emitted/read once, in order. +* `bytes` MAY be streamed directly: + + * Encoders MAY produce the payload incrementally after emitting `bytes_len`. + * Decoders MAY pass the payload through to a consumer or hasher as it is read. + +Incremental hashing (e.g., computing digests over `ArtifactBytes`) MUST be possible with a single forward pass over the byte stream. + +--- + +## 4. Reference Encoding + +### 4.1 Logical structure (from ASL/1-CORE) + +From `ASL/1-CORE`: + +```text +Reference { + hash_id: HashId // uint16 + digest: OctetString +} + +HashId = uint16 +``` + +For encoding purposes, `Reference.digest` is treated as a raw digest byte string, not as a generic encoded `u64 + bytes` OctetString. + +### 4.2 Canonical layout: ReferenceBytes + +The canonical binary layout for a `Reference` is: + +```text ++----------------+---------------------------+ +| hash_id (u16) | digest (b[?]) ... ++----------------+---------------------------+ +``` + +Fields: + +1. **hash_id (u16)** + + * Encodes `Reference.hash_id`. + * Semantically, an element of the `HashId` space defined by ASL/1-CORE (and populated by HASH/ASL1 when present). + +2. **digest** + + * Raw digest bytes. + * The length of `digest` is **not encoded** explicitly in this profile. + * Digest length is determined by the decoding context: + + * by the **frame boundary** of the `ReferenceBytes` value (e.g. “this message consists of exactly one `ReferenceBytes`”), or + * by an outer length-prefix in a higher-level enclosing structure. + +> This layout is an explicit exception to the general `OctetString = u64 + bytes` rule. It keeps `ReferenceBytes` compact and relies on framing + the hash registry for length. + +### 4.3 Encoding (normative) + +Let `R` be a `Reference`. The canonical encoding function: + +```text +encode_reference_core_v1 : Reference → ReferenceBytes +``` + +is defined as: + +1. Emit `hash_id = R.hash_id` as `u16`. + +2. Emit the raw bytes of `R.digest`. + +When `HASH/ASL1` is implemented and the `hash_id` is known, the encoder MUST ensure: + +```text +len(R.digest) == expected_digest_length(hash_id) +``` + +where `expected_digest_length` is taken from the HASH/ASL1 registry. + +The result is the canonical `ReferenceBytes`. + +### 4.4 Decoding & consistency checks (normative) + +Given a byte slice known to contain exactly one `ReferenceBytes` value, the canonical decoding function: + +```text +decode_reference_core_v1 : ReferenceBytes → Reference +``` + +is defined as: + +1. Read `hash_id` as `u16`. + +2. Treat **all remaining bytes in the slice** as the digest `digest`. + +3. Construct `Reference{ hash_id, digest }`. + +**Boundary requirement:** + +Decoding contexts MUST provide explicit boundaries for `ReferenceBytes` values (e.g., via an external length-prefix or by framing the entire message as a single `ReferenceBytes` value). A decoder MUST NOT read beyond the slice that defines the `ReferenceBytes` frame. + +**Cross-profile consistency with HASH/ASL1 (when present):** + +If the implementation also implements `HASH/ASL1` and recognizes this `hash_id`, then: + +* Let `expected_len = expected_digest_length(hash_id)` from the ASL1 registry. + +* The implementation **MUST** enforce: + + ```text + len(digest) == expected_len + ``` + +* Any mismatch MUST result in an encoding/integrity error. + +If the implementation does **not** implement HASH/ASL1 or does not recognize the `hash_id`: + +* It MAY accept the value as a structurally well-formed `Reference`. +* It MUST treat the algorithm as **unsupported** for digest recomputation or verification. + +### 4.5 Injectivity + +The mapping: + +```text +Reference → ReferenceBytes +``` + +defined by `encode_reference_core_v1` is **injective**: + +* Each `Reference` value has exactly one canonical byte string. +* Equality of `ReferenceBytes` implies equality of the underlying `Reference` (same `hash_id`, same digest bytes). + +No additional normalization is performed. + +--- + +## 5. Hash Interactions & Canonicality + +### 5.1 Canonical hashing rule + +For encoding profile `ASL_ENC_CORE_V1`, the canonical rule for constructing `Reference` values from `Artifact` values is: + +```text +ArtifactBytes = encode_artifact_core_v1(A) +digest = H(ArtifactBytes) +Reference = { hash_id = HID, digest = digest } +``` + +where: + +* `A` is an `Artifact` (ASL/1-CORE), +* `H` is a hash function associated with `HID` in the ASL1 hash family, +* `HID` is a `HashId` (u16). + +This is `ASL/CORE-REF-DERIVE/1` instantiated with `ASL_ENC_CORE_V1`. + +> **REF-DERIVE INV/ENC/1** +> Under `ASL_ENC_CORE_V1`, any component that claims to derive `Reference` values from `Artifact` values **MUST** use this rule. + +### 5.2 Default algorithm in canonical deployments + +In canonical Amduat 2.0 substrate deployments (per `HASH/ASL1`): + +* `HashId = 0x0001` is assigned to `HASH-ASL1-256`. +* Digest length is 32 bytes. +* `HASH-ASL1-256` is SHA-256 or semantically equivalent. + +This profile does **not** force any particular `HashId` in all deployments, but: + +* if a deployment adopts `HashId = 0x0001` as `HASH-ASL1-256`, then any `Reference` with `hash_id = 0x0001` **MUST** have a 32-byte digest. + +### 5.3 Deterministic agreement + +If two implementations: + +* implement `ASL_ENC_CORE_V1`, and +* use the same hash algorithm `H` for a given `HashId`, + +then for any `Artifact A` they MUST: + +* produce identical `ArtifactBytes = encode_artifact_core_v1(A)`, +* produce identical `digest = H(ArtifactBytes)`, +* produce identical `Reference` and `ReferenceBytes = encode_reference_core_v1(Reference)`. + +This is the determinism foundation used by ASL/1-STORE, PEL/1, FER/1, and FCT/1. + +### 5.4 Identity contexts and encoding profile selection + +For any context where `Reference` values are derived (e.g. a store, a PEL engine, a profile), the **encoding profile MUST be fixed and explicit**. + +If a context adopts `ASL_ENC_CORE_V1`: + +* All `Reference` values in that context MUST be derived via `encode_artifact_core_v1` and the canonical hashing rule (§5.1). +* The context MUST NOT mix `Reference`s derived from different canonical encoding profiles inside the same logical identity space. + +This ensures that for a given `(hash_id, digest)` pair, there is a unique underlying `ArtifactBytes` and `Artifact` (modulo cryptographic collisions). + +--- + +## 6. Examples (Non-Normative) + +Hex values are shown compactly without separators. + +### 6.1 Artifact without type tag + +Artifact: + +```text +bytes = DE AD // two bytes: 0xDE, 0xAD +type_tag = none +``` + +Encoding: + +```text +has_type_tag = 00 +bytes_len = 0000000000000002 +bytes = DEAD +``` + +Canonical `ArtifactBytes`: + +```text +00 0000000000000002 DEAD +``` + +Digest with `HASH-ASL1-256` (SHA-256): + +```text +digest = SHA-256(00 0000000000000002 DEAD) +``` + +Assuming `HashId = 0001` for `HASH-ASL1-256`, the `ReferenceBytes` are: + +```text +hash_id = 0001 +digest = <32 digest bytes> +``` + +Canonical `ReferenceBytes`: + +```text +0001 <32 digest bytes> +``` + +### 6.2 Artifact with type tag & empty bytes + +Artifact: + +```text +bytes = "" (empty) +type_tag = TypeTag{ tag_id = 5 } +``` + +Encoding: + +```text +has_type_tag = 01 +type_tag = 00000005 +bytes_len = 0000000000000000 +bytes = (none) +``` + +Canonical `ArtifactBytes`: + +```text +01 00000005 0000000000000000 +``` + +Hashing and `ReferenceBytes` proceed as in §6.1. + +--- + +## 7. Conformance + +An implementation conforms to `ENC/ASL1-CORE v1.0.5` if and only if it: + +1. **Correctly encodes and decodes Artifacts** + + * Implements `encode_artifact_core_v1` and `decode_artifact_core_v1` exactly as in §3.3 and §3.4. + * Produces and accepts only the canonical layout for `ArtifactBytes`. + * Ensures injectivity and exact round-tripping. + +2. **Correctly encodes and decodes References** + + * Implements `encode_reference_core_v1` and `decode_reference_core_v1` exactly as in §4.3 and §4.4. + * Produces and accepts only the canonical layout for `ReferenceBytes` (no `digest_len` field). + * When HASH/ASL1 is also implemented: + + * Enforces digest-length consistency for all known `HashId`s, i.e. `len(digest) == expected_digest_length(hash_id)`. + +3. **Implements canonical hashing correctly** + + * Uses `ArtifactBytes` from `encode_artifact_core_v1` as the **only** input to ASL1 hash functions when deriving `Reference`s under this profile. + * Computes `Reference` via the canonical rule in §5.1. + * Does not derive `Reference`s from non-canonical or alternative encodings in contexts that claim to use `ASL_ENC_CORE_V1`. + +4. **Preserves streaming-friendliness** + + * Does not require backward reads or multi-pass parsing for either `ArtifactBytes` or `ReferenceBytes`. + * Supports incremental hashing and streaming of payload bytes. + * Ensures that decoding contexts provide explicit boundaries for each `ReferenceBytes` value. + +5. **Respects layering and identity semantics** + + * Does not re-define `Artifact`, `Reference`, `TypeTag`, or `HashId` (those come from `ASL/1-CORE`). + * Treats storage, transport, and policy as out-of-scope (delegated to ASL/1-STORE and higher profiles). + * Ensures that two logical ASL/1 values encode identically under this profile **if and only if** they are identical under ASL/1-CORE semantics. + +Everything else — transport, storage layout, replication, indexing, overlays, and policy — belongs to `ASL/1-STORE`, `HASH/ASL1`, `TGK/1`, and higher profiles. + +--- + +## Document History + +* **1.0.5 (2025-11-16):** Registered as Tier-1 spec and aligned to the Amduat 2.0 substrate baseline. diff --git a/tier1/hash-asl1.md b/tier1/hash-asl1.md new file mode 100644 index 0000000..654a0cc --- /dev/null +++ b/tier1/hash-asl1.md @@ -0,0 +1,490 @@ +# HASH/ASL1 — ASL1 Hash Algorithm Registry + +Status: Approved +Owner: Niklas Rydberg +Version: 0.2.4 +SoT: Yes +Last Updated: 2025-11-16 +Linked Phase Pack: N/A +Tags: [deterministic, registry] + + + +**Document ID:** `HASH/ASL1` +**Layer:** Substrate primitive profile (over ASL/1-CORE) + +**Depends on (normative):** + +* `ASL/1-CORE v0.4.x` — value substrate: `HashId`, `Reference`, `Artifact`, `EncodingProfileId` +* `ENC/ASL1-CORE v1.x` — canonical encoding for `Reference` (`ReferenceBytes`) + +**Informative references:** + +* `ASL/1-STORE v0.4.x` — content-addressable store model +* `TGK/1-CORE v0.7.x` — trace graph kernel (uses `Reference`) +* `PEL/1` — execution substrate +* `CIL/1`, `FCT/1`, `FER/1`, `OI/1` — profiles that depend on stable `Reference` semantics +* (future) `CID/1` — content identifier and domain-separation rules + +© 2025 Niklas Rydberg. + +## License + +Except where otherwise noted, this document (text and diagrams) is licensed under +the Creative Commons Attribution 4.0 International License (CC BY 4.0). + +The identifier registries and mapping tables (e.g. TypeTag IDs, HashId +assignments, EdgeTypeId tables) are additionally made available under CC0 1.0 +Universal (CC0) to enable unrestricted reuse in implementations and derivative +specifications. + +Code examples in this document are provided under the Apache License 2.0 unless +explicitly stated otherwise. Test vectors, where present, are dedicated to the +public domain under CC0 1.0. + + +--- + +## 0. Purpose & Context + +`HASH/ASL1` defines the **ASL1 hash algorithm family** for Amduat 2.0: + +* assigns stable `HashId` (`uint16`) values to concrete cryptographic hash algorithms; +* defines the **mandatory** baseline algorithm `HASH-ASL1-256`; +* reserves ranges for future classical and post-quantum algorithms; +* specifies how these algorithms are used when deriving `Reference` values: + + * via `ASL/CORE-REF-DERIVE/1` in `ASL/1-CORE`, and + * via `ENC/ASL1-CORE v1` binary encoding of `ReferenceBytes`. + +This is a **substrate primitive profile**, not kernel, but: + +> In Amduat 2.0, all **identity-critical** `Reference.hash_id` values used by the standard stack (ASL/1-STORE, TGK/1-CORE, PEL/1, CIL/1, FER/1, FCT/1, OI/1) MUST be interpreted according to this registry. + +--- + +## 1. Scope + +### 1.1 In scope + +This specification standardizes: + +1. The **ASL1 hash family**: common properties all algorithms must satisfy. + +2. A **registry** from `HashId` → algorithm descriptor: + + * `HashId` (`uint16`), + * digest length (bytes), + * normative definition and status. + +3. How these algorithms connect to: + + * `ASL/1-CORE`’s Reference derivation rule (`ASL/CORE-REF-DERIVE/1`), + * `ENC/ASL1-CORE v1`’s `ReferenceBytes` encoding. + +4. Rules for **algorithm evolution**: + + * immutability of assignments, + * constraints for adding new algorithms. + +### 1.2 Out of scope + +This specification does **not** define: + +* storage APIs, replication, or retention, +* execution runtimes, scheduling, or side effects, +* keyed constructions (MACs, KDFs, PRFs, etc.), +* non-cryptographic hashes, +* domain-separation rules at the CID layer (those belong in `CID/1` and/or encoding profiles), +* migration policy (it only provides primitives). + +--- + +## 2. Terminology & Conventions + +The RFC 2119 terms **MUST**, **SHOULD**, **MAY**, etc. apply. + +From `ASL/1-CORE`: + +* `OctetString` — finite byte sequence (`0x00–0xFF`), +* `HashId` — `uint16`, used as `Reference.hash_id`, +* `Reference` — `{ hash_id: HashId; digest: OctetString }`, +* `EncodingProfileId` — `uint16` identifying canonical encodings (e.g. `ASL_ENC_CORE_V1`), +* `ASL/CORE-REF-DERIVE/1` — normative Reference derivation rule. + +From `ENC/ASL1-CORE v1` (current): + +* `ReferenceBytes` — canonical encoding: + + ```text + u16 hash_id + digest[...] // remaining bytes in the frame are the digest + ``` + +**Note:** `Reference` carries only `hash_id` and `digest`. There is no extra “family” field on-wire. For Amduat 2.0, `HashId` values in ASL/1 contexts are **globally** interpreted using this `HASH/ASL1` registry. + +--- + +## 3. The ASL1 Hash Family + +### 3.1 Family properties + +All `"ASL1"` algorithms MUST be **cryptographic hash functions**: + +* **Preimage resistance** – infeasible to find `x` for a given digest `d` with `H(x) = d`. +* **Second-preimage resistance** – infeasible, given `x`, to find `x' ≠ x` with `H(x') = H(x)`. +* **Collision resistance** – infeasible to find any `(x, x')`, `x ≠ x'` with `H(x) = H(x')`. + +Each `"ASL1"` algorithm: + +* accepts arbitrary-length `OctetString` inputs, +* produces a **fixed-length** `OctetString` digest, +* MUST support **incremental / streaming** operation: + + * a single forward-only pass over input, + * no need to buffer entire input. + +These properties allow: + +* hashing large canonical encodings incrementally, +* use in streaming stores and execution engines. + +### 3.2 Family name and global use + +* Family name: `"ASL1"`. + +Within Amduat 2.0: + +* all **identity-critical** `Reference.hash_id` values used by the standard stack are interpreted as entries in this `"ASL1"` registry; +* `HASH/ASL1` is therefore the **global assignment** for `HashId` in ASL/1 identity contexts. + +If other hash families are used in non-ASL contexts (e.g., external APIs), they **MUST NOT** reuse `HashId` values defined here for `Reference.hash_id` in ASL/1-CORE. They should either: + +* live in separate fields / structures; or +* use distinct namespaces not confused with `Reference.hash_id`. + +### 3.3 HashId space + +`HashId` is `uint16` and appears in `Reference.hash_id` and in `ReferenceBytes.hash_id`. + +This registry reserves: + +* `0x0000` — **Reserved** (never a valid algorithm). +* `0x0001–0x7FFF` — classical (pre-quantum) `"ASL1"` algorithms. +* `0x8000–0xFFFF` — post-quantum or specialized `"ASL1"` algorithms. + +Each algorithm has an intrinsic digest length `L` (>0 bytes), defined by its normative spec. This document does not impose an upper bound beyond “finite and practically representable in implementations.” (ENC/ASL1-CORE v1 does not carry the length explicitly; length is implied by framing and cross-checked against `L` when the algorithm is known.) + +--- + +## 4. Algorithm Registry + +### 4.1 Registry (v0.2.4) + +The `"ASL1"` registry is a mapping: + +```text +HashId (uint16) -> Algorithm descriptor +``` + +At version 0.2.4: + +| HashId | Name | Digest (bytes) | Status | Notes | +| ------------: | ------------- | -------------- | --------- | ------------------------------------------ | +| **0x0001** | HASH-ASL1-256 | 32 | MANDATORY | Canonical default for `ASL_ENC_CORE_V1` | +| 0x0002 | HASH-ASL1-512 | 64 (reserved) | RESERVED | Intended classical 512-bit algorithm | +| 0x8001 | HASH-ASL1-PQ1 | TBD | RESERVED | First PQ algorithm placeholder | +| 0x8002–0x80FF | — | varies | RESERVED | Reserved range for future PQ / specialized | + +Only `0x0001` is defined normatively at this version; others are reserved for future assignment. + +### 4.2 HASH-ASL1-256 (mandatory) + +* **Name:** `HASH-ASL1-256` +* **HashId:** `0x0001` +* **Digest length:** 32 bytes +* **Status:** MANDATORY for all Amduat 2.0–conformant implementations + +#### 4.2.1 Normative definition + +`HASH-ASL1-256` is **bit-for-bit identical** to SHA-256 as defined in FIPS 180-4 (or any successor that preserves SHA-256 semantics). + +For all `data : OctetString`: + +```text +HASH-ASL1-256(data) == SHA-256(data) +``` + +Any implementation whose output differs from SHA-256 for any input MUST NOT claim to implement `HASH-ASL1-256`. + +`HASH-ASL1-256` MUST be deterministic and support incremental processing of input. + +#### 4.2.2 Relationship to ASL/1-CORE & ASL_ENC_CORE_V1 + +`ASL/1-CORE` defines `ASL/CORE-REF-DERIVE/1`: + +```text +ArtifactBytes = encode_P(A) +digest = H(ArtifactBytes) +Reference = { hash_id = HID, digest = digest } +``` + +For: + +* `P = ASL_ENC_CORE_V1` (`EncodingProfileId = 0x0001`), +* `HID = 0x0001`, +* `H = HASH-ASL1-256`, + +this becomes the **canonical default** Reference derivation for Amduat 2.0. + +Unless a profile explicitly opts out, all identity-critical `Reference` values for Artifacts encoded under `ASL_ENC_CORE_V1` **MUST** use this `(P, H)` pair. + +### 4.3 Reserved IDs + +The following identifiers are reserved: + +* `0x0002` — `HASH-ASL1-512`, digest length 64 bytes; classical 512-bit algorithm (e.g. SHA-512 or similar), TBD. +* `0x8001` — `HASH-ASL1-PQ1`; first post-quantum algorithm, TBD. +* `0x8002–0x80FF` — reserved block for additional post-quantum / specialized algorithms. + +Implementations MUST NOT treat these IDs as usable until a future `HASH/ASL1` revision defines them normatively. + +--- + +## 5. Interaction with ASL/1-CORE & ENC/ASL1-CORE v1 + +### 5.1 Reference derivation + +`ASL/1-CORE` defines `ASL/CORE-REF-DERIVE/1`. `HASH/ASL1` simply supplies the `"ASL1"` algorithms and `HashId`s. + +Given: + +* Artifact `A`, +* encoding profile `P`, +* algorithm `H` with `HashId = HID`, + +then: + +```text +ArtifactBytes = encode_P(A) +digest = H(ArtifactBytes) +Reference = { hash_id = HID, digest = digest } +``` + +All ASL/1 conformant components **MUST** use this procedure for any `(EncodingProfileId, HashId)` pair they claim to support. + +### 5.2 ReferenceBytes under ENC/ASL1-CORE v1 + +`ENC/ASL1-CORE v1` encodes a `Reference` as: + +```text +u16 hash_id +digest[...] // remaining bytes in the enclosing frame are the digest +``` + +This profile does **not** carry an explicit digest length; framing is provided by the enclosing structure (e.g., length-prefix, message boundary). + +When an implementation both: + +* decodes `ReferenceBytes` under `ENC/ASL1-CORE v1`, and +* implements `HASH/ASL1` and recognizes `hash_id`, + +then it MUST enforce: + +```text +len(digest) == canonical_digest_length(hash_id) +``` + +where `canonical_digest_length(hash_id)` is taken from this registry. + +Any mismatch MUST be treated as an encoding / integrity error by the consumer. + +If a `hash_id` is unknown (or HASH/ASL1 is not implemented), an implementation MAY still treat the bytes as a generic `Reference { hash_id, digest }`, but: + +* it cannot recompute or verify the digest cryptographically, and +* higher layers MAY treat such a `Reference` as unsupported or lower-trust. + +--- + +## 6. Crypto Agility & Evolution + +### 6.1 Immutability of assignments + +Once a `HashId` is assigned to an algorithm, its: + +* digest length, +* underlying construction, +* behavior on all inputs, + +MUST NOT change in any way that alters output values for the **same input bytes**. + +For example: + +* `HashId = 0x0001` MUST always denote SHA-256 semantics; future revisions cannot redefine it as anything that changes the digest for the same input bytes (e.g. “SHA-256 plus domain separator”). + +If domain separation or similar techniques are required, they MUST be expressed at the **input construction** level (e.g. in `CID/1` or encoding profiles), not by changing the hash function definition. + +### 6.2 Adding new algorithms + +A new `"ASL1"` algorithm MAY be added in a future `HASH/ASL1` version if and only if: + +* it satisfies the family properties in §3.1; + +* it has a fixed digest length `L > 0` bytes; + +* its spec includes: + + * assigned `HashId`, + * digest length, + * normative algorithm definition (via external standard or full spec), + * status (`MANDATORY`, `RECOMMENDED`, `OPTIONAL`, `EXPERIMENTAL`); + +* it is introduced via: + + * a new `HASH/ASL1` version, + * at least one ADR, + * published test vectors. + +Existing `HashId` assignments MUST NOT be repurposed. + +### 6.3 Coexistence and migration (informative) + +Higher layers can use `"ASL1"`’s crypto agility by: + +* computing more than one `Reference` for the same Artifact (multi-hash), +* storing those in receipts, overlays, or catalogs, +* defining profile-specific policies like: + + * “from date D, compute both `HASH-ASL1-256` and `HASH-ASL1-PQ1` for all new Artifacts; prefer 0x8001 for new dependencies.” + +`HASH/ASL1` itself: + +* does not prescribe when to migrate, +* only guarantees that `HashId` mappings and algorithms are stable. + +--- + +## 7. Conformance + +An implementation is **HASH/ASL1–conformant** (v0.2.4) if: + +1. **Correct HASH-ASL1-256 implementation** + + * Provides a `HASH-ASL1-256` function: + + * accepts arbitrary-length `OctetString` input, + * returns a 32-byte `OctetString` digest, + + * matches SHA-256 exactly for all inputs, + + * behaves deterministically and supports incremental operation. + +2. **Consistent Reference use with ENC/ASL1-CORE v1** + + * When encoding `ReferenceBytes`, emits: + + * `hash_id` as `u16`, + * digest bytes equal in length to the algorithm’s canonical digest length. + + * When decoding `ReferenceBytes`: + + * for known `hash_id` values, enforces `len(digest) == canonical_digest_length(hash_id)` and treats mismatches as errors; + * for unknown `hash_id` values, MAY accept `Reference` structurally but MUST treat the algorithm as unsupported for verification. + +3. **Registry immutability** + + * Does not change the meaning of any assigned `HashId`, + * Does not use reserved IDs as custom algorithms outside the formal registry process. + +4. **Family compliance for extra algorithms** + + * For any additional `"ASL1"` algorithms claimed: + + * ensures they satisfy §3.1, + * documents their digest length and behavior. + +5. **Integration with ASL/1-CORE** + + * Uses `ASL/CORE-REF-DERIVE/1` when deriving References in the ASL/1 context, + * For `ASL_ENC_CORE_V1` and `hash_id = 0x0001`, uses `HASH-ASL1-256` unless a profile explicitly specifies another algorithm. + +--- + +## 8. Security Considerations + +1. **Collision risk** + + * Collisions in `HASH-ASL1-256` would be a severe substrate-level integrity issue for systems that rely only on `HashId = 0x0001`. + * Higher layers (CIL/1, FCT/1, FER/1, OI/1, TGK/PROV-style profiles) SHOULD: + + * assume collisions are possible in principle, + * provide detection and mitigation strategies (e.g. optional dual-hash, anomaly logging). + +2. **Algorithm deprecation** + + * If `HASH-ASL1-256` becomes weak: + + * future specs MAY introduce a new mandatory algorithm, + * migration strategies SHOULD be defined at profile / domain layers. + + * Existing References with `HashId = 0x0001` remain valid as historical IDs; their meaning MUST NOT be changed. + +3. **Side-channel resistance** + + * Implementations SHOULD mitigate timing/cache/power side channels, especially in shared environments. + * Use well-reviewed crypto libraries where possible. + +4. **Non-ASL1 hash usage** + + * Systems MAY use other hash functions (e.g., for local caches, external APIs), + * Such functions MUST NOT reuse `HashId`s defined in this registry for `Reference.hash_id`, + * They MUST be clearly separated from ASL/1 identity semantics. + +--- + +## 9. Example (Non-Normative) + +Given: + +* `EncodingProfileId = ASL_ENC_CORE_V1 (0x0001)`, +* algorithm `HASH-ASL1-256` (`HashId = 0x0001`), +* Artifact: + + ```text + Artifact { + bytes = 0xDE AD + type_tag = none + } + ``` + +Assume `ENC/ASL1-CORE v1` canonical Artifact encoding: + +```text +00 ; has_type_tag = false +0000000000000002 ; bytes_len = 2 (u64) +DEAD ; bytes +``` + +Then: + +1. `ArtifactBytes = encode_artifact_core_v1(Artifact)`. +2. `digest = HASH-ASL1-256(ArtifactBytes)` (SHA-256). +3. `Reference = { hash_id = 0x0001, digest = digest }`. +4. `ReferenceBytes` under `ENC/ASL1-CORE v1`: + + ```text + 0001 <32 bytes of digest> + ``` + +The frame boundary (e.g., length prefix or message boundary) determines where the digest ends. A consumer that knows `hash_id = 0x0001` and implements HASH/ASL1 will: + +* expect exactly 32 digest bytes, +* treat any other length as an error. + +This `Reference` can be used consistently across `ASL/1-STORE`, `TGK/1-CORE`, `PEL/1`, `CIL/1`, `FER/1`, `FCT/1`, `OI/1`, with equality defined by `ASL/1-CORE`. + +--- + +## Document History + +* **0.2.4 (2025-11-16):** Registered as Tier-1 spec and aligned to the Amduat 2.0 substrate baseline. diff --git a/tier1/opreg-pel1-kernel.md b/tier1/opreg-pel1-kernel.md new file mode 100644 index 0000000..4c0a3c6 --- /dev/null +++ b/tier1/opreg-pel1-kernel.md @@ -0,0 +1,741 @@ +# OPREG/PEL1-KERNEL — Kernel Operation Registry for PEL/1 + +Status: Approved +Owner: Niklas Rydberg +Version: 0.1.1 +SoT: Yes +Last Updated: 2025-11-16 +Linked Phase Pack: N/A +Tags: [registry, execution] + + + +**Document ID:** `OPREG/PEL1-KERNEL` +**Layer:** L1 Profile (Operation Registry for `PEL/1-CORE` + `PEL/PROGRAM-DAG/1`) + +**Depends on (normative):** + +* `ASL/1-CORE v0.3.x` — `Artifact`, `TypeTag`, `Reference`, `HashId` +* `PEL/1-CORE v0.1.x` — primitive execution layer core +* `PEL/PROGRAM-DAG/1 v0.2.x` — DAG scheme for PEL +* `HASH/ASL1 v0.2.x` — ASL1 hash family (for `HASH-ASL1-256`) + +**Integrates with (informative):** + +* `SUBSTRATE/STACK-OVERVIEW v0.1.x` +* `ENC/PEL-PROGRAM-DAG/1` (canonical encoding of Program) +* `PEL/TRACE-DAG/1` (optional trace profile) +* Higher-level operation registries (domain-specific ops) + +© 2025 Niklas Rydberg. + +## License + +Except where otherwise noted, this document (text and diagrams) is licensed under +the Creative Commons Attribution 4.0 International License (CC BY 4.0). + +The identifier registries and mapping tables (e.g. TypeTag IDs, HashId +assignments, EdgeTypeId tables) are additionally made available under CC0 1.0 +Universal (CC0) to enable unrestricted reuse in implementations and derivative +specifications. + +Code examples in this document are provided under the Apache License 2.0 unless +explicitly stated otherwise. Test vectors, where present, are dedicated to the +public domain under CC0 1.0. + + +--- + +## 0. Purpose and Non-Goals + +### 0.1 Purpose + +`OPREG/PEL1-KERNEL` defines a **minimal, globally stable set of PEL operations** that every “kernel-capable” PEL engine is expected to implement: + +* They operate on **ASL/1 Artifacts** (bytes + optional type tag). +* They are used in **`PEL/PROGRAM-DAG/1`** programs as `OperationId { name, version }`. +* They are **pure and deterministic**: same inputs → same outputs, independent of engine or environment. +* They explicitly define: + + * **Arity** (number of inputs), + * **Parameter model** (logical Params value), + * **Output shape** (number and form of outputs), + * **Runtime error conditions** and associated `status_code` values for `PEL/PROGRAM-DAG/1`. + +These operations are intentionally **low-level** and **byte-centric**; richer semantics (JSON, typed records, domain-specific logic) belong in separate registries. + +### 0.2 Non-goals + +This registry does **not** define: + +* Any storage or transport API (`ASL/1-STORE`). +* Any encoding of Programs or Params into bytes (`ENC/PEL-PROGRAM-DAG/1`, param-encoding profiles). +* Any certification or fact semantics (`CIL/1`, `FER/1`, `FCT/1`). +* Provenance graph edges (`TGK/1`). +* Human-readable diagnostics payloads (see §2.4). + +--- + +## 1. Conventions and Context + +### 1.1 Base types + +From `ASL/1-CORE`: + +```text +Artifact { + bytes: OctetString + type_tag: optional TypeTag +} + +TypeTag { + tag_id: uint32 +} + +Reference { + hash_id: HashId + digest: OctetString +} + +HashId = uint16 +``` + +From `PEL/1-CORE` and `PEL/PROGRAM-DAG/1` (simplified): + +```text +OperationId { + name: string + version: uint32 +} + +ExecutionStatus = uint8 // e.g. OK, INVALID_PROGRAM, INVALID_INPUTS, RUNTIME_FAILED +ExecutionErrorKind = uint8 // e.g. NONE, PROGRAM, INPUTS, RUNTIME + +ExecutionErrorSummary { + kind: ExecutionErrorKind + status_code: uint32 +} + +DiagnosticEntry { + code: uint32 + message: OctetString +} + +ExecutionResultValue { + pel1_version : uint16 + status : ExecutionStatus + scheme_ref : SchemeRef + summary : ExecutionErrorSummary + diagnostics : list +} +``` + +`PEL/PROGRAM-DAG/1` defines `Exec_DAG` as: + +```text +Exec_DAG( + program: Artifact, + inputs: list, + params: optional Artifact +) -> (outputs: list, result: ExecutionResultValue) +``` + +and defines that each Node evaluates an `OperationId` with a logical interface: + +```text +Op(name, version)( + inputs: list, + params: ParamsValue +) -> Ok(list) | Err(status_code: uint32) +``` + +The overall `ExecutionResultValue.summary.status_code` for `RUNTIME_FAILED` is taken from the `status_code` returned by the failing operation. + +### 1.2 Status and error mapping + +This registry only defines **runtime error codes** (used when `Exec_DAG` sets `status = RUNTIME_FAILED`). + +Global outcome statuses: + +```text +ExecutionStatus { + OK = 0 + INVALID_PROGRAM = 2 + INVALID_INPUTS = 3 + RUNTIME_FAILED = 4 +} +``` + +Error summary kind: + +```text +ExecutionErrorKind { + NONE = 0 + PROGRAM = 1 + INPUTS = 2 + RUNTIME = 3 +} +``` + +Mapping (from `PEL/PROGRAM-DAG/1`): + +* `status = OK` ⇒ `kind = NONE`, `status_code = 0` +* `status = INVALID_PROGRAM`⇒ `kind = PROGRAM`, `status_code = 2` +* `status = INVALID_INPUTS` ⇒ `kind = INPUTS`, `status_code = 3` +* `status = RUNTIME_FAILED` ⇒ `kind = RUNTIME`, `status_code = op-specific (> 0)` + +This registry **only** defines the operation-specific `status_code` values that may appear when `status = RUNTIME_FAILED`. + +### 1.3 Kernel status_code layout + +For kernel ops we reserve a simple scheme for `status_code` on runtime failure: + +```text +status_code = (kernel_op_code << 16) | error_index +``` + +Where: + +* `kernel_op_code` is a 16-bit numeric code assigned per operation in this registry. +* `error_index` is a small (non-zero) 16-bit integer enumerating distinct error causes per op. + +This ensures: + +* No collision between error codes of different operations. +* Easy offline decoding of `status_code` into `(op, reason)`. + +Concrete `kernel_op_code` assignments are given in §3. + +### 1.4 Params and encodings + +Each operation defines a **logical Params type** (e.g. `SliceParams { offset: u64; length: u64 }`). + +This registry does **not** define byte-level encodings of Params; those are defined in a companion profile (e.g. `OPREG/PEL1-KERNEL-PARAMS/1`). This document is the **semantic** registry. + +Conformance requirements: + +* For each operation, there MUST exist exactly one canonical encoding and decoding for its Params type. +* All engines claiming to implement the operation MUST use that same encoding and decoding. +* If Params decoding fails, the operation MUST treat the Node as either: + + * `INVALID_PROGRAM` (preferred for static malformations), or + * `RUNTIME_FAILED` with a specific `status_code` (if the registry so specifies). + +For this initial kernel set, we treat **Param decoding errors as INVALID_PROGRAM**, not as runtime failures. + +### 1.5 Diagnostics + +To keep `ExecutionResultValue` stable and simple, kernel operations: + +For kernel operations, the operation semantics MUST always return an empty diagnostics list, and the scheme’s Exec_DAG implementation MUST NOT add additional diagnostics when a failing Node is a kernel op. + +Human-readable error information is expected to be carried in: + +* separate trace artifacts (`PEL/TRACE-DAG/1`), or +* external logs and observability systems, not in `ExecutionResultValue.diagnostics`. + +--- + +## 2. Common Kernel Operation Conventions + +All kernel operations in this registry share these properties: + +1. **Purity and determinism** + + * They operate only on: + + * the input `Artifact.bytes` and `Artifact.type_tag`, + * their decoded Params, + * standard pure functions (e.g. integer arithmetic, hashing as per `HASH/ASL1`). + + * They MUST NOT: + + * read clocks or random sources, + * perform network or filesystem I/O, + * depend on global mutable state. + +2. **Type tags** + + * Unless otherwise stated, operations **preserve the input type tag** when transforming a single input. + * For operations with multiple inputs, if they require consistent type tags, this is checked at runtime and may yield a runtime error. + * Operations MAY produce Artifacts with `type_tag = None` for “raw bytes” outputs. + +3. **Arity and static vs dynamic errors** + + * Each operation specifies `min_inputs` and `max_inputs`. + * Violations of these arity constraints are **static** (depend only on the Program) and MUST be treated as `INVALID_PROGRAM`, not `RUNTIME_FAILED`. + * Runtime errors are reserved for **data-dependent** conditions (e.g. out-of-bounds slice based on actual input length). + +4. **Success vs failure** + + * On success: operation returns `Ok(list)`, and `Exec_DAG` keeps `status = OK` (unless a different Node fails later). + * On failure: operation returns `Err(status_code)`, and `Exec_DAG` stops evaluation and sets: + + ```text + status = RUNTIME_FAILED + summary.kind = RUNTIME + summary.status_code = status_code + diagnostics = [] + ``` + +--- + +## 3. Kernel Operation Index + +We define four kernel operations: + +| Kernel Op Code | OperationId.name | version | Summary | +| -------------: | ----------------------- | :------ | --------------------------------------- | +| `0x0001` | `"pel.bytes.concat"` | `1` | Concatenate N artifacts | +| `0x0002` | `"pel.bytes.slice"` | `1` | Take a byte slice of one artifact | +| `0x0003` | `"pel.bytes.const"` | `1` | Produce a constant artifact from params | +| `0x0004` | `"pel.bytes.hash.asl1"` | `1` | Hash an artifact’s bytes with ASL1 | + +All operation names are case-sensitive UTF-8 strings. + +Each operation’s `OperationId` is: + +```text +OperationId { + name: + version: 1 +} +``` + +`kernel_op_code` in the `status_code` formula (§1.3) is the hex code in the first column. + +--- + +## 4. Operation Specifications + +### 4.1 `pel.bytes.concat` v1 (code 0x0001) + +**OperationId** + +```text +name = "pel.bytes.concat" +version = 1 +kernel_op_code = 0x0001 +``` + +**Intent** + +Concatenate the byte payloads of N input Artifacts (N ≥ 1) into a single output Artifact. All input type tags MUST be identical (including “no type tag”). + +#### 4.1.1 Arity and Params + +* `min_inputs = 1` +* `max_inputs = unbounded` (any positive number) +* Params: **none** (`Unit`) + +Static errors (handled as `INVALID_PROGRAM`): + +* `inputs.length == 0`. +* Params not decodable as `Unit` (i.e. any non-empty Params according to the canonical encoding). + +#### 4.1.2 Semantics + +Given: + +```text +inputs = [A0, A1, ..., A_{n-1}], n >= 1 +params = () +``` + +Let: + +```text +Ti = Ai.type_tag +Bi = Ai.bytes +``` + +1. **Type tag consistency check (runtime)** + + * If there exist `i, j` such that `Ti` and `Tj` are not equal in the `ASL/1-CORE` sense (i.e. one is absent and the other present, or both present but with different `tag_id`): + + * Operation returns `Err(status_code = 0x0001_0001)`. + +2. **Concatenation** + + * Define: + + ```text + B_out = B0 || B1 || ... || B_{n-1} // byte-wise concatenation + T_out = T0 // they are all equal by step 1 + ``` + + * Output list is a single Artifact `C`: + + ```text + C.bytes = B_out + C.type_tag = T_out + ``` + + * Operation returns `Ok([C])`. + +This operation does not impose any explicit limit on the concatenated length; overflow or resource exhaustion is outside the PEL semantic layer. + +#### 4.1.3 Runtime error codes + +For `pel.bytes.concat` v1, runtime errors (producing `RUNTIME_FAILED`) are: + +| Name | Condition | `status_code` | +| ------------------- | ---------------------------------- | ------------- | +| `TYPE_TAG_MISMATCH` | Any pair of input type tags differ | `0x0001_0001` | + +On any such error: + +* Operation returns `Err(status_code)` as above. +* `Exec_DAG` sets `status = RUNTIME_FAILED`, `summary.status_code = status_code`, `diagnostics = []`. + +--- + +### 4.2 `pel.bytes.slice` v1 (code 0x0002) + +**OperationId** + +```text +name = "pel.bytes.slice" +version = 1 +kernel_op_code = 0x0002 +``` + +**Intent** + +Take a contiguous slice from a single input Artifact’s bytes. + +#### 4.2.1 Params: `SliceParams` + +Logical Params: + +```text +SliceParams { + offset: uint64 // byte offset, 0-based + length: uint64 // number of bytes to include +} +``` + +* `offset` and `length` are non-negative. +* Their canonical encoding/decoding is defined in a param-encoding profile; invalid encodings MUST result in `INVALID_PROGRAM`. + +#### 4.2.2 Arity + +* `min_inputs = 1` +* `max_inputs = 1` + +Arity violations → `INVALID_PROGRAM`. + +#### 4.2.3 Semantics + +Given: + +```text +inputs = [A] +params = SliceParams { offset, length } +``` + +Let: + +```text +B = A.bytes // length = L +T = A.type_tag +L = |B| +o = offset +ℓ = length +``` + +1. **Range check (runtime)** + + * If `o > L` or `o + ℓ > L` (with arithmetic in unbounded integers): + + * Operation returns `Err(status_code = 0x0002_0001)`. + +2. **Slicing** + + * Define: + + ```text + B_out = B[o .. o+ℓ] // ℓ bytes starting at index o + ``` + + * Output Artifact `C`: + + ```text + C.bytes = B_out + C.type_tag = T + ``` + + * Operation returns `Ok([C])`. + +Note: `o == L` and `ℓ == 0` is allowed and yields an empty-byte output. + +#### 4.2.4 Runtime error codes + +For `pel.bytes.slice` v1: + +| Name | Condition | `status_code` | +| --------------------- | ------------------------------------------------------- | ------------- | +| `RANGE_OUT_OF_BOUNDS` | `offset > len(bytes)` or `offset + length > len(bytes)` | `0x0002_0001` | + +On such error, `Exec_DAG` sets `status = RUNTIME_FAILED`, `summary.status_code = 0x0002_0001`, `diagnostics = []`. + +--- + +### 4.3 `pel.bytes.const` v1 (code 0x0003) + +**OperationId** + +```text +name = "pel.bytes.const" +version = 1 +kernel_op_code = 0x0003 +``` + +**Intent** + +Produce a constant Artifact specified entirely by Params, with no data dependencies. This is a way to embed small literal values directly in a Program. + +#### 4.3.1 Params: `ConstParams` + +Logical Params: + +```text +ConstParams { + bytes: OctetString // payload bytes + has_tag: bool // whether a type tag is present + tag_id: uint32 optional // only meaningful if has_tag = true +} +``` + +Semantics: + +* If `has_tag == false`: + + * Output Artifact has `type_tag = None`. + +* If `has_tag == true`: + + * Output Artifact has `type_tag = Some(TypeTag{ tag_id })`. + +Param encoding/decoding is defined in a param-encoding profile; malformed encodings ⇒ `INVALID_PROGRAM`. + +#### 4.3.2 Arity + +* `min_inputs = 0` +* `max_inputs = 0` + +Any non-empty `inputs` list is a static error (`INVALID_PROGRAM`). + +#### 4.3.3 Semantics + +Given: + +```text +inputs = [] +params = ConstParams { bytes = B, has_tag, tag_id? } +``` + +Then: + +* If `has_tag` is false: + + ```text + C.bytes = B + C.type_tag = None + ``` + +* If `has_tag` is true: + + ```text + C.bytes = B + C.type_tag = Some(TypeTag{ tag_id }) + ``` + +* Output list is `[C]`. + +* Operation returns `Ok([C])`. + +There are no data-dependent runtime errors: this operation **always succeeds** given valid Params. + +#### 4.3.4 Runtime error codes + +*None.* +All failures are static (bad Params encoding, wrong arity) and must be treated as `INVALID_PROGRAM`. + +--- + +### 4.4 `pel.bytes.hash.asl1` v1 (code 0x0004) + +**OperationId** + +```text +name = "pel.bytes.hash.asl1" +version = 1 +kernel_op_code = 0x0004 +``` + +**Intent** + +Compute an ASL1-family hash (`HASH/ASL1`) over the raw bytes of a single input Artifact. + +This operation is **not** about ASL/1 identity (which uses `ArtifactBytes` via `ENC/ASL1-CORE`), but about hashing arbitrary byte payloads for protocol or application use. + +#### 4.4.1 Params: `HashParams` + +Logical Params: + +```text +HashParams { + hash_id: HashId // must be a valid ASL1 HashId +} +``` + +For this version: + +* `hash_id` MUST be `0x0001` (i.e. `HASH-ASL1-256`). +* Any other `hash_id` MUST be treated as a **static error** ⇒ `INVALID_PROGRAM`. + +Rationale: this ensures all conformant engines agree on the algorithm set for this op. Future versions (e.g. `pel.bytes.hash.asl1` v2) MAY support additional `HashId`s. + +Param encoding/decoding is defined elsewhere; malformed encodings ⇒ `INVALID_PROGRAM`. + +#### 4.4.2 Arity + +* `min_inputs = 1` +* `max_inputs = 1` + +Arity violations ⇒ `INVALID_PROGRAM`. + +#### 4.4.3 Semantics + +Given: + +```text +inputs = [A] +params = HashParams { hash_id = 0x0001 } +``` + +Let: + +```text +B = A.bytes +H = HASH-ASL1-256 // SHA-256 as defined in HASH/ASL1 for HashId 0x0001 +``` + +Compute: + +```text +digest = H(B) // 32-byte digest +``` + +Then: + +* Output Artifact `C`: + + ```text + C.bytes = digest // exactly 32 bytes + C.type_tag = None // raw bytes digest, no type tag + ``` + +* Output list is `[C]`. + +* Operation returns `Ok([C])`. + +There are no data-dependent runtime errors; hashing is assumed total. Any internal errors (e.g. memory failure) are outside PEL semantics. + +#### 4.4.4 Runtime error codes + +*None.* +All failures (unsupported `hash_id`, bad Params, wrong arity) are static and must be treated as `INVALID_PROGRAM`. + +--- + +## 5. Conformance + +An engine is **OPREG/PEL1-KERNEL–conformant** if and only if: + +1. **Operation availability** + + * It exposes the four operations defined in §3 with exactly the specified `OperationId { name, version }`. + +2. **Arity and Params** + + * For each operation, it enforces `min_inputs`/`max_inputs` as specified. + * It implements the defined logical Params types and uses the canonical param encoding/decoding for each. + * It treats: + + * arity violations, and + * invalid or undecodable Params + + as `INVALID_PROGRAM` per `PEL/PROGRAM-DAG/1` (i.e. `Exec_DAG` produces `status = INVALID_PROGRAM`, `summary.status_code = 2`). + +3. **Runtime semantics** + + * For all supported operations: + + * Given the same input Artifacts and Params, all conformant engines produce identical output Artifacts (same `bytes`, same `type_tag`) and identical `status_code` on failure. + * Runtime failure conditions (e.g. slice out-of-bounds, type tag mismatch) are detected exactly as specified and mapped to the correct `status_code` using the `kernel_op_code`/`error_index` scheme. + +4. **Status and diagnostics mapping** + + * When a kernel operation returns `Ok`, `Exec_DAG` MUST NOT change `status` or `summary` (beyond normal success semantics). + * When a kernel operation returns `Err(status_code)`: + + * `Exec_DAG` MUST set: + + ```text + status = RUNTIME_FAILED + summary.kind = RUNTIME + summary.status_code = status_code + diagnostics = [] + ``` + + * `Exec_DAG` MUST NOT mutate other fields of `ExecutionResultValue` except as defined in `PEL/PROGRAM-DAG/1` (e.g. to capture which Node failed, via trace profiles). + +5. **Purity** + + * Kernel operations MUST not perform external I/O or observe environment state; they must behave as pure functions of their inputs and Params. + * Any caching or performance optimizations MUST NOT change observable behavior at the level of `Artifact` values and `status_code`. + +6. **Layering** + + * The engine does not depend on `ASL/1-STORE`, `TGK/1`, `CIL/1`, `FER/1`, `FCT/1`, or `OI/1` on the PEL core hot path. It may use those layers around PEL, but not as part of the operation semantics. + +--- + +## 6. Change Log (informative) + +**v0.1.1 (2025-11-15)** + +* Removed `pel.bytes.clone/1` from the kernel operation index. + +* Reassigned `kernel_op_code` values to remove the `0x01` gap: + + * `pel.bytes.concat/1`: `0x0002` → `0x0001` (and `TYPE_TAG_MISMATCH` from `0x0002_0001` → `0x0001_0001`). + * `pel.bytes.slice/1`: `0x0003` → `0x0002` (and `RANGE_OUT_OF_BOUNDS` from `0x0003_0001` → `0x0002_0001`). + * `pel.bytes.const/1`: `0x0004` → `0x0003`. + * `pel.bytes.hash.asl1/1`: `0x0005` → `0x0004`. + +* Updated §3, §4, and §5 to reflect the new kernel op set and codes. + +**v0.1.0 (2025-11-15)** + +* Initial definition of `OPREG/PEL1-KERNEL` with five kernel operations: + + * `pel.bytes.clone/1` + * `pel.bytes.concat/1` + * `pel.bytes.slice/1` + * `pel.bytes.const/1` + * `pel.bytes.hash.asl1/1` + +* Established `status_code = (kernel_op_code << 16) | error_index` convention. + +* Restricted `pel.bytes.hash.asl1/1` to `HashId = 0x0001` (HASH-ASL1-256) for cross-implementation determinism. + +* Required kernel operations to leave `ExecutionResultValue.diagnostics` empty; richer diagnostics to be handled by trace/overlay profiles. + +--- + +## Document History + +* **0.1.1 (2025-11-16):** Registered as Tier-1 spec and aligned to the Amduat 2.0 substrate baseline.