diff --git a/CMakeLists.txt b/CMakeLists.txt index a2e7c61..d209a73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,3 +33,19 @@ target_link_libraries(amduatd PRIVATE amduat_tgk amduat_pel amduat_format amduat_asl_store_fs amduat_asl amduat_enc amduat_hash_asl1 amduat_util amduat_federation ) + +add_executable(amduat_pel_gc + src/amduat_pel_gc.c + src/asl_gc_fs.c +) + +set_target_properties(amduat_pel_gc PROPERTIES OUTPUT_NAME "amduat-pel") + +target_include_directories(amduat_pel_gc + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/vendor/amduat/include +) + +target_link_libraries(amduat_pel_gc + PRIVATE amduat_asl_store_fs amduat_asl_record amduat_asl amduat_enc + amduat_hash_asl1 amduat_pel amduat_util +) diff --git a/src/amduat_pel_gc.c b/src/amduat_pel_gc.c new file mode 100644 index 0000000..353a7dd --- /dev/null +++ b/src/amduat_pel_gc.c @@ -0,0 +1,75 @@ +#include "asl_gc_fs.h" + +#include "amduat/util/log.h" + +#include +#include +#include +#include + +static void amduat_pel_gc_usage(const char *argv0) { + fprintf(stderr, + "Usage: %s gc --root [--keep-materializations] [--delete] [--dry-run]\n", + argv0); +} + +int main(int argc, char **argv) { + const char *root = NULL; + bool keep_materializations = false; + bool delete_artifacts = false; + bool dry_run = true; + amduat_asl_gc_fs_options_t opts; + amduat_asl_gc_fs_stats_t stats; + + if (argc < 2 || strcmp(argv[1], "gc") != 0) { + amduat_pel_gc_usage(argv[0]); + return 2; + } + + for (int i = 2; i < argc; ++i) { + if (strcmp(argv[i], "--root") == 0) { + if (i + 1 >= argc) { + amduat_pel_gc_usage(argv[0]); + return 2; + } + root = argv[++i]; + } else if (strcmp(argv[i], "--keep-materializations") == 0) { + keep_materializations = true; + } else if (strcmp(argv[i], "--delete") == 0) { + delete_artifacts = true; + dry_run = false; + } else if (strcmp(argv[i], "--dry-run") == 0) { + dry_run = true; + delete_artifacts = false; + } else if (strcmp(argv[i], "--help") == 0 || + strcmp(argv[i], "-h") == 0) { + amduat_pel_gc_usage(argv[0]); + return 0; + } else { + amduat_pel_gc_usage(argv[0]); + return 2; + } + } + + if (root == NULL) { + amduat_pel_gc_usage(argv[0]); + return 2; + } + + opts.keep_materializations = keep_materializations; + opts.delete_artifacts = delete_artifacts; + opts.dry_run = dry_run; + if (!amduat_asl_gc_fs_run(root, &opts, &stats)) { + amduat_log(AMDUAT_LOG_ERROR, "gc failed"); + return 1; + } + + printf("pointer_roots=%zu\n", stats.pointer_roots); + printf("materialization_roots=%zu\n", stats.materialization_roots); + printf("marked_artifacts=%zu\n", stats.marked_artifacts); + printf("candidates=%zu\n", stats.candidates); + printf("candidate_bytes=%llu\n", + (unsigned long long)stats.candidate_bytes); + printf("mode=%s\n", delete_artifacts ? "delete" : "dry-run"); + return 0; +} diff --git a/src/asl_gc_fs.c b/src/asl_gc_fs.c new file mode 100644 index 0000000..d49a460 --- /dev/null +++ b/src/asl_gc_fs.c @@ -0,0 +1,1399 @@ +#include "asl_gc_fs.h" + +#include "amduat/asl/asl_materialization_cache_fs.h" +#include "amduat/asl/asl_store_fs.h" +#include "amduat/asl/asl_store_fs_meta.h" +#include "amduat/asl/log_store.h" +#include "amduat/asl/record.h" +#include "amduat/enc/asl1_core_codec.h" +#include "amduat/enc/pel1_result.h" +#include "amduat/enc/pel_trace_dag.h" +#include "amduat/hash/asl1.h" +#include "amduat/util/hex.h" +#include "amduat/util/log.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + AMDUAT_GC_POINTER_MAGIC_LEN = 8, + AMDUAT_GC_POINTER_VERSION = 1, + AMDUAT_GC_POINTER_FLAG_HAS_PREV = 1u << 1 +}; + +static const uint8_t k_amduat_gc_pointer_magic[AMDUAT_GC_POINTER_MAGIC_LEN] = { + 'A', 'S', 'L', 'P', 'T', 'R', '1', '\0' +}; + +enum { + AMDUAT_GC_LOG_MAGIC_LEN = 8, + AMDUAT_GC_LOG_VERSION = 1 +}; + +static const uint8_t k_amduat_gc_log_magic[AMDUAT_GC_LOG_MAGIC_LEN] = { + 'A', 'S', 'L', 'L', 'O', 'G', '1', '\0' +}; + +enum { + AMDUAT_GC_LOG_FLAG_HAS_PREV = 1u << 0, + AMDUAT_GC_LOG_FLAG_HAS_TIMESTAMP = 1u << 1, + AMDUAT_GC_LOG_FLAG_HAS_ACTOR = 1u << 2 +}; + +enum { + AMDUAT_GC_COLLECTION_MAGIC_LEN = 8, + AMDUAT_GC_COLLECTION_VERSION = 1 +}; + +static const uint8_t k_amduat_gc_collection_magic[ + AMDUAT_GC_COLLECTION_MAGIC_LEN] = { + 'A', 'S', 'L', 'C', 'O', 'L', '1', '\0' +}; + +typedef struct { + size_t digest_len; + size_t capacity; + size_t size; + uint8_t *digests; + uint8_t *used; +} amduat_gc_digest_set_t; + +typedef struct { + size_t digest_len; + size_t len; + size_t capacity; + uint8_t *items; +} amduat_gc_digest_stack_t; + +typedef struct { + amduat_asl_store_t store; + amduat_asl_store_config_t config; + amduat_asl_store_fs_t fs; + size_t digest_len; + amduat_gc_digest_set_t marked; + amduat_gc_digest_stack_t stack; + amduat_asl_gc_fs_stats_t stats; + bool keep_materializations; +} amduat_gc_ctx_t; + +static uint64_t amduat_gc_hash_bytes(const uint8_t *data, size_t len) { + uint64_t hash = 1469598103934665603ULL; + for (size_t i = 0u; i < len; ++i) { + hash ^= (uint64_t)data[i]; + hash *= 1099511628211ULL; + } + return hash; +} + +static bool amduat_gc_set_init(amduat_gc_digest_set_t *set, + size_t digest_len, + size_t capacity) { + size_t cap = 1u; + if (set == NULL || digest_len == 0u) { + return false; + } + while (cap < capacity) { + cap <<= 1u; + } + set->digests = (uint8_t *)calloc(cap, digest_len); + if (set->digests == NULL) { + return false; + } + set->used = (uint8_t *)calloc(cap, 1u); + if (set->used == NULL) { + free(set->digests); + set->digests = NULL; + return false; + } + set->digest_len = digest_len; + set->capacity = cap; + set->size = 0u; + return true; +} + +static void amduat_gc_set_free(amduat_gc_digest_set_t *set) { + if (set == NULL) { + return; + } + free(set->digests); + free(set->used); + memset(set, 0, sizeof(*set)); +} + +static bool amduat_gc_set_resize(amduat_gc_digest_set_t *set, + size_t new_capacity) { + amduat_gc_digest_set_t next; + if (!amduat_gc_set_init(&next, set->digest_len, new_capacity)) { + return false; + } + for (size_t i = 0u; i < set->capacity; ++i) { + if (!set->used[i]) { + continue; + } + const uint8_t *digest = set->digests + i * set->digest_len; + uint64_t hash = amduat_gc_hash_bytes(digest, set->digest_len); + size_t mask = next.capacity - 1u; + size_t idx = (size_t)hash & mask; + while (next.used[idx]) { + idx = (idx + 1u) & mask; + } + memcpy(next.digests + idx * next.digest_len, digest, next.digest_len); + next.used[idx] = 1u; + next.size++; + } + amduat_gc_set_free(set); + *set = next; + return true; +} + +static bool amduat_gc_set_contains(const amduat_gc_digest_set_t *set, + const uint8_t *digest) { + uint64_t hash; + size_t mask; + size_t idx; + + if (set == NULL || set->capacity == 0u) { + return false; + } + hash = amduat_gc_hash_bytes(digest, set->digest_len); + mask = set->capacity - 1u; + idx = (size_t)hash & mask; + for (size_t probe = 0u; probe < set->capacity; ++probe) { + if (!set->used[idx]) { + return false; + } + if (memcmp(set->digests + idx * set->digest_len, digest, + set->digest_len) == 0) { + return true; + } + idx = (idx + 1u) & mask; + } + return false; +} + +static bool amduat_gc_set_insert(amduat_gc_digest_set_t *set, + const uint8_t *digest) { + uint64_t hash; + size_t mask; + size_t idx; + + if (set == NULL) { + return false; + } + if ((set->size + 1u) * 10u >= set->capacity * 7u) { + if (!amduat_gc_set_resize(set, set->capacity * 2u)) { + return false; + } + } + + hash = amduat_gc_hash_bytes(digest, set->digest_len); + mask = set->capacity - 1u; + idx = (size_t)hash & mask; + for (;;) { + if (!set->used[idx]) { + memcpy(set->digests + idx * set->digest_len, digest, set->digest_len); + set->used[idx] = 1u; + set->size++; + return true; + } + if (memcmp(set->digests + idx * set->digest_len, digest, + set->digest_len) == 0) { + return false; + } + idx = (idx + 1u) & mask; + } +} + +static bool amduat_gc_stack_init(amduat_gc_digest_stack_t *stack, + size_t digest_len, + size_t capacity) { + if (stack == NULL) { + return false; + } + stack->items = (uint8_t *)malloc(digest_len * capacity); + if (stack->items == NULL) { + return false; + } + stack->digest_len = digest_len; + stack->len = 0u; + stack->capacity = capacity; + return true; +} + +static void amduat_gc_stack_free(amduat_gc_digest_stack_t *stack) { + if (stack == NULL) { + return; + } + free(stack->items); + memset(stack, 0, sizeof(*stack)); +} + +static bool amduat_gc_stack_push(amduat_gc_digest_stack_t *stack, + const uint8_t *digest) { + size_t offset; + uint8_t *next; + + if (stack == NULL) { + return false; + } + offset = stack->len * stack->digest_len; + if (stack->len == stack->capacity) { + size_t new_cap = stack->capacity == 0u ? 128u : stack->capacity * 2u; + next = (uint8_t *)realloc(stack->items, new_cap * stack->digest_len); + if (next == NULL) { + return false; + } + stack->items = next; + stack->capacity = new_cap; + } + memcpy(stack->items + offset, digest, stack->digest_len); + stack->len++; + return true; +} + +static bool amduat_gc_stack_pop(amduat_gc_digest_stack_t *stack, + uint8_t *out_digest) { + if (stack == NULL || stack->len == 0u) { + return false; + } + stack->len--; + memcpy(out_digest, stack->items + stack->len * stack->digest_len, + stack->digest_len); + return true; +} + +static bool amduat_gc_join_path(const char *base, + const char *segment, + char **out_path) { + size_t base_len; + size_t seg_len; + bool needs_sep; + size_t total_len; + char *buffer; + size_t offset; + + if (base == NULL || segment == NULL || out_path == NULL) { + return false; + } + if (base[0] == '\0' || segment[0] == '\0') { + return false; + } + + base_len = strlen(base); + seg_len = strlen(segment); + needs_sep = base[base_len - 1u] != '/'; + total_len = base_len + (needs_sep ? 1u : 0u) + seg_len + 1u; + + buffer = (char *)malloc(total_len); + if (buffer == NULL) { + return false; + } + + offset = 0u; + memcpy(buffer + offset, base, base_len); + offset += base_len; + if (needs_sep) { + buffer[offset++] = '/'; + } + memcpy(buffer + offset, segment, seg_len); + offset += seg_len; + buffer[offset] = '\0'; + + *out_path = buffer; + return true; +} + +static char *amduat_gc_strdup(const char *value) { + size_t len; + char *out; + + if (value == NULL) { + return NULL; + } + len = strlen(value); + out = (char *)malloc(len + 1u); + if (out == NULL) { + return NULL; + } + memcpy(out, value, len); + out[len] = '\0'; + return out; +} + +static bool amduat_gc_read_u32_le(const uint8_t *data, + size_t len, + size_t *offset, + uint32_t *out) { + if (*offset > len || len - *offset < 4u) { + return false; + } + *out = (uint32_t)data[*offset] | + ((uint32_t)data[*offset + 1u] << 8) | + ((uint32_t)data[*offset + 2u] << 16) | + ((uint32_t)data[*offset + 3u] << 24); + *offset += 4u; + return true; +} + +static bool amduat_gc_read_u64_le(const uint8_t *data, + size_t len, + size_t *offset, + uint64_t *out) { + if (*offset > len || len - *offset < 8u) { + return false; + } + *out = (uint64_t)data[*offset] | + ((uint64_t)data[*offset + 1u] << 8) | + ((uint64_t)data[*offset + 2u] << 16) | + ((uint64_t)data[*offset + 3u] << 24) | + ((uint64_t)data[*offset + 4u] << 32) | + ((uint64_t)data[*offset + 5u] << 40) | + ((uint64_t)data[*offset + 6u] << 48) | + ((uint64_t)data[*offset + 7u] << 56); + *offset += 8u; + return true; +} + +static bool amduat_gc_read_u16_le(const uint8_t *data, + size_t len, + size_t *offset, + uint16_t *out) { + if (*offset > len || len - *offset < 2u) { + return false; + } + *out = (uint16_t)data[*offset] | + ((uint16_t)data[*offset + 1u] << 8); + *offset += 2u; + return true; +} + +static bool amduat_gc_read_file(const char *path, + uint8_t **out_data, + size_t *out_len) { + FILE *fp; + long size; + uint8_t *buffer; + + if (out_data == NULL || out_len == NULL) { + return false; + } + *out_data = NULL; + *out_len = 0u; + + fp = fopen(path, "rb"); + if (fp == NULL) { + return false; + } + if (fseek(fp, 0, SEEK_END) != 0) { + fclose(fp); + return false; + } + size = ftell(fp); + if (size < 0) { + fclose(fp); + return false; + } + if (fseek(fp, 0, SEEK_SET) != 0) { + fclose(fp); + return false; + } + buffer = (uint8_t *)malloc((size_t)size); + if (buffer == NULL) { + fclose(fp); + return false; + } + if (fread(buffer, 1u, (size_t)size, fp) != (size_t)size) { + free(buffer); + fclose(fp); + return false; + } + fclose(fp); + *out_data = buffer; + *out_len = (size_t)size; + return true; +} + +static bool amduat_gc_mark_digest(amduat_gc_ctx_t *ctx, + const uint8_t *digest) { + if (amduat_gc_set_insert(&ctx->marked, digest)) { + return amduat_gc_stack_push(&ctx->stack, digest); + } + return true; +} + +static bool amduat_gc_mark_ref(amduat_gc_ctx_t *ctx, + const amduat_reference_t *ref) { + if (ref == NULL || ref->digest.data == NULL || ref->digest.len == 0u) { + return false; + } + if (ref->hash_id != ctx->config.hash_id) { + amduat_log(AMDUAT_LOG_DEBUG, "gc skip ref with hash_id %u", + (unsigned int)ref->hash_id); + return true; + } + return amduat_gc_mark_digest(ctx, ref->digest.data); +} + +static bool amduat_gc_read_pointer_head(const char *path, + const char *name, + amduat_reference_t *out_ref, + amduat_reference_t *out_prev, + bool *out_has_prev) { + uint8_t *data = NULL; + size_t len = 0u; + size_t offset = 0u; + uint32_t version = 0u; + uint8_t flags = 0u; + uint32_t name_len = 0u; + uint32_t ref_len = 0u; + uint32_t prev_len = 0u; + amduat_octets_t ref_bytes; + amduat_octets_t prev_bytes; + bool ok = false; + + if (out_ref == NULL || out_prev == NULL || out_has_prev == NULL) { + return false; + } + memset(out_ref, 0, sizeof(*out_ref)); + memset(out_prev, 0, sizeof(*out_prev)); + *out_has_prev = false; + + if (!amduat_gc_read_file(path, &data, &len)) { + return false; + } + if (len < AMDUAT_GC_POINTER_MAGIC_LEN + 4u + 1u) { + goto cleanup; + } + if (memcmp(data, k_amduat_gc_pointer_magic, + AMDUAT_GC_POINTER_MAGIC_LEN) != 0) { + goto cleanup; + } + offset += AMDUAT_GC_POINTER_MAGIC_LEN; + if (!amduat_gc_read_u32_le(data, len, &offset, &version) || + version != AMDUAT_GC_POINTER_VERSION) { + goto cleanup; + } + if (offset >= len) { + goto cleanup; + } + flags = data[offset++]; + if (!amduat_gc_read_u32_le(data, len, &offset, &name_len)) { + goto cleanup; + } + if (len - offset < name_len) { + goto cleanup; + } + if (strlen(name) != name_len || + memcmp(data + offset, name, name_len) != 0) { + goto cleanup; + } + offset += name_len; + if (!amduat_gc_read_u32_le(data, len, &offset, &ref_len)) { + goto cleanup; + } + if (ref_len < 2u || len - offset < ref_len) { + goto cleanup; + } + ref_bytes = amduat_octets(data + offset, ref_len); + if (!amduat_enc_asl1_core_decode_reference_v1(ref_bytes, out_ref)) { + goto cleanup; + } + offset += ref_len; + if (!amduat_gc_read_u32_le(data, len, &offset, &prev_len)) { + goto cleanup; + } + if (prev_len != 0u) { + if (!(flags & AMDUAT_GC_POINTER_FLAG_HAS_PREV)) { + goto cleanup; + } + if (prev_len < 2u || len - offset < prev_len) { + goto cleanup; + } + prev_bytes = amduat_octets(data + offset, prev_len); + if (!amduat_enc_asl1_core_decode_reference_v1(prev_bytes, out_prev)) { + goto cleanup; + } + offset += prev_len; + *out_has_prev = true; + } + if (offset != len) { + goto cleanup; + } + ok = true; + +cleanup: + if (!ok) { + amduat_reference_free(out_ref); + amduat_reference_free(out_prev); + } + free(data); + return ok; +} + +static bool amduat_gc_decode_log_chunk(amduat_octets_t bytes, + amduat_reference_t *out_prev, + bool *out_has_prev, + amduat_reference_t **out_refs, + size_t *out_refs_len) { + size_t offset = 0u; + uint32_t version = 0u; + uint8_t flags = 0u; + uint32_t prev_len = 0u; + uint32_t entry_count = 0u; + amduat_reference_t *refs = NULL; + + if (out_prev == NULL || out_has_prev == NULL || + out_refs == NULL || out_refs_len == NULL) { + return false; + } + memset(out_prev, 0, sizeof(*out_prev)); + *out_has_prev = false; + *out_refs = NULL; + *out_refs_len = 0u; + + if (bytes.len < AMDUAT_GC_LOG_MAGIC_LEN + 4u + 1u) { + return false; + } + if (memcmp(bytes.data, k_amduat_gc_log_magic, + AMDUAT_GC_LOG_MAGIC_LEN) != 0) { + return false; + } + offset += AMDUAT_GC_LOG_MAGIC_LEN; + if (!amduat_gc_read_u32_le(bytes.data, bytes.len, &offset, &version) || + version != AMDUAT_GC_LOG_VERSION) { + return false; + } + if (offset >= bytes.len) { + return false; + } + flags = bytes.data[offset++]; + if (!amduat_gc_read_u32_le(bytes.data, bytes.len, &offset, &prev_len)) { + return false; + } + if (prev_len != 0u) { + amduat_octets_t prev_bytes; + if (!(flags & AMDUAT_GC_LOG_FLAG_HAS_PREV)) { + return false; + } + if (bytes.len - offset < prev_len || prev_len < 2u) { + return false; + } + prev_bytes = amduat_octets(bytes.data + offset, prev_len); + if (!amduat_enc_asl1_core_decode_reference_v1(prev_bytes, out_prev)) { + return false; + } + offset += prev_len; + *out_has_prev = true; + } + { + uint64_t ignored; + if (!amduat_gc_read_u64_le(bytes.data, bytes.len, &offset, &ignored)) { + amduat_reference_free(out_prev); + return false; + } + } + if (!amduat_gc_read_u32_le(bytes.data, bytes.len, &offset, &entry_count)) { + amduat_reference_free(out_prev); + return false; + } + if (entry_count != 0u) { + if (entry_count > SIZE_MAX / sizeof(*refs)) { + amduat_reference_free(out_prev); + return false; + } + refs = (amduat_reference_t *)calloc(entry_count, sizeof(*refs)); + if (refs == NULL) { + amduat_reference_free(out_prev); + return false; + } + } + + for (uint32_t i = 0u; i < entry_count; ++i) { + uint16_t kind; + uint32_t ref_len; + uint32_t actor_len = 0u; + amduat_octets_t ref_bytes; + + if (!amduat_gc_read_u16_le(bytes.data, bytes.len, &offset, &kind)) { + goto decode_error; + } + (void)kind; + if (flags & AMDUAT_GC_LOG_FLAG_HAS_TIMESTAMP) { + uint64_t ignored; + if (!amduat_gc_read_u64_le(bytes.data, bytes.len, &offset, &ignored)) { + goto decode_error; + } + } + if (!amduat_gc_read_u32_le(bytes.data, bytes.len, &offset, &ref_len)) { + goto decode_error; + } + if (ref_len < 2u || bytes.len - offset < ref_len) { + goto decode_error; + } + ref_bytes = amduat_octets(bytes.data + offset, ref_len); + if (!amduat_enc_asl1_core_decode_reference_v1(ref_bytes, &refs[i])) { + goto decode_error; + } + offset += ref_len; + if (flags & AMDUAT_GC_LOG_FLAG_HAS_ACTOR) { + if (!amduat_gc_read_u32_le(bytes.data, bytes.len, &offset, &actor_len)) { + goto decode_error; + } + if (bytes.len - offset < actor_len) { + goto decode_error; + } + offset += actor_len; + } + } + if (offset != bytes.len) { + goto decode_error; + } + + *out_refs = refs; + *out_refs_len = entry_count; + return true; + +decode_error: + for (uint32_t i = 0u; i < entry_count; ++i) { + amduat_reference_free(&refs[i]); + } + free(refs); + amduat_reference_free(out_prev); + return false; +} + +static bool amduat_gc_decode_collection_snapshot(amduat_octets_t payload, + amduat_reference_t **out_refs, + size_t *out_refs_len) { + size_t offset = 0u; + uint32_t version = 0u; + uint32_t ref_count = 0u; + amduat_reference_t *refs = NULL; + + if (out_refs == NULL || out_refs_len == NULL) { + return false; + } + *out_refs = NULL; + *out_refs_len = 0u; + + if (payload.len < AMDUAT_GC_COLLECTION_MAGIC_LEN + 4u + 8u + 4u) { + return false; + } + if (memcmp(payload.data, k_amduat_gc_collection_magic, + AMDUAT_GC_COLLECTION_MAGIC_LEN) != 0) { + return false; + } + offset += AMDUAT_GC_COLLECTION_MAGIC_LEN; + if (!amduat_gc_read_u32_le(payload.data, payload.len, &offset, &version) || + version != AMDUAT_GC_COLLECTION_VERSION) { + return false; + } + { + uint64_t ignored; + if (!amduat_gc_read_u64_le(payload.data, payload.len, &offset, &ignored)) { + return false; + } + } + if (!amduat_gc_read_u32_le(payload.data, payload.len, &offset, &ref_count)) { + return false; + } + if (ref_count != 0u) { + if (ref_count > SIZE_MAX / sizeof(*refs)) { + return false; + } + refs = (amduat_reference_t *)calloc(ref_count, sizeof(*refs)); + if (refs == NULL) { + return false; + } + } + for (uint32_t i = 0u; i < ref_count; ++i) { + uint32_t ref_len = 0u; + amduat_octets_t ref_bytes; + if (!amduat_gc_read_u32_le(payload.data, payload.len, &offset, &ref_len)) { + goto decode_error; + } + if (ref_len < 2u || payload.len - offset < ref_len) { + goto decode_error; + } + ref_bytes = amduat_octets(payload.data + offset, ref_len); + if (!amduat_enc_asl1_core_decode_reference_v1(ref_bytes, &refs[i])) { + goto decode_error; + } + offset += ref_len; + } + if (offset != payload.len) { + goto decode_error; + } + + *out_refs = refs; + *out_refs_len = ref_count; + return true; + +decode_error: + for (uint32_t i = 0u; i < ref_count; ++i) { + amduat_reference_free(&refs[i]); + } + free(refs); + return false; +} + +static bool amduat_gc_mark_refs(amduat_gc_ctx_t *ctx, + amduat_reference_t *refs, + size_t refs_len) { + for (size_t i = 0u; i < refs_len; ++i) { + if (!amduat_gc_mark_ref(ctx, &refs[i])) { + return false; + } + } + return true; +} + +static bool amduat_gc_walk_pointer_dir(amduat_gc_ctx_t *ctx, + const char *dir_path, + const char *rel_name) { + DIR *dir; + struct dirent *entry; + bool ok = true; + + dir = opendir(dir_path); + if (dir == NULL) { + return errno == ENOENT; + } + + while ((entry = readdir(dir)) != NULL) { + char *child_path = NULL; + char *child_rel = NULL; + struct stat st; + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) { + continue; + } + if (!amduat_gc_join_path(dir_path, entry->d_name, &child_path)) { + ok = false; + break; + } + if (stat(child_path, &st) != 0) { + free(child_path); + ok = false; + break; + } + if (S_ISDIR(st.st_mode)) { + if (rel_name != NULL && rel_name[0] != '\0') { + size_t len = strlen(rel_name) + 1u + strlen(entry->d_name) + 1u; + child_rel = (char *)malloc(len); + if (child_rel == NULL) { + free(child_path); + ok = false; + break; + } + snprintf(child_rel, len, "%s/%s", rel_name, entry->d_name); + } else { + child_rel = amduat_gc_strdup(entry->d_name); + } + if (child_rel == NULL) { + free(child_path); + ok = false; + break; + } + if (!amduat_gc_walk_pointer_dir(ctx, child_path, child_rel)) { + ok = false; + free(child_rel); + free(child_path); + break; + } + free(child_rel); + } else if (S_ISREG(st.st_mode) && + strcmp(entry->d_name, "head") == 0) { + if (rel_name != NULL && rel_name[0] != '\0') { + amduat_reference_t ref = {0}; + amduat_reference_t prev_ref = {0}; + bool has_prev = false; + if (amduat_gc_read_pointer_head(child_path, rel_name, + &ref, &prev_ref, &has_prev)) { + ctx->stats.pointer_roots++; + amduat_gc_mark_ref(ctx, &ref); + if (has_prev) { + ctx->stats.pointer_roots++; + amduat_gc_mark_ref(ctx, &prev_ref); + } + } else { + amduat_log(AMDUAT_LOG_WARN, "gc pointer head parse failed: %s", + child_path); + } + amduat_reference_free(&ref); + amduat_reference_free(&prev_ref); + } + } + free(child_path); + } + + closedir(dir); + return ok; +} + +static bool amduat_gc_collect_pointer_roots(amduat_gc_ctx_t *ctx, + const char *root_path) { + char *pointers_path = NULL; + bool ok; + + if (!amduat_gc_join_path(root_path, "pointers", &pointers_path)) { + return false; + } + ok = amduat_gc_walk_pointer_dir(ctx, pointers_path, ""); + free(pointers_path); + return ok; +} + +static bool amduat_gc_walk_materializations(amduat_gc_ctx_t *ctx, + const char *dir_path, + amduat_asl_materialization_cache_fs_t *cache) { + DIR *dir; + struct dirent *entry; + bool ok = true; + + dir = opendir(dir_path); + if (dir == NULL) { + return errno == ENOENT; + } + + while ((entry = readdir(dir)) != NULL) { + char *child_path = NULL; + struct stat st; + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) { + continue; + } + if (!amduat_gc_join_path(dir_path, entry->d_name, &child_path)) { + ok = false; + break; + } + if (stat(child_path, &st) != 0) { + free(child_path); + ok = false; + break; + } + if (S_ISDIR(st.st_mode)) { + if (!amduat_gc_walk_materializations(ctx, child_path, cache)) { + ok = false; + free(child_path); + break; + } + } else if (S_ISREG(st.st_mode)) { + uint8_t *sid_bytes = NULL; + size_t sid_len = 0u; + amduat_reference_t *refs = NULL; + size_t refs_len = 0u; + amduat_asl_store_error_t err; + + if (strstr(entry->d_name, ".tmp") != NULL) { + free(child_path); + continue; + } + if (!amduat_hex_decode_alloc(entry->d_name, &sid_bytes, &sid_len)) { + free(child_path); + continue; + } + err = amduat_asl_materialization_cache_fs_get( + cache, amduat_octets(sid_bytes, sid_len), &refs, &refs_len); + if (err == AMDUAT_ASL_STORE_OK) { + for (size_t i = 0u; i < refs_len; ++i) { + ctx->stats.materialization_roots++; + amduat_gc_mark_ref(ctx, &refs[i]); + } + for (size_t i = 0u; i < refs_len; ++i) { + amduat_reference_free(&refs[i]); + } + free(refs); + } + free(sid_bytes); + } + free(child_path); + } + + closedir(dir); + return ok; +} + +static bool amduat_gc_collect_materializations(amduat_gc_ctx_t *ctx, + const char *root_path) { + char *index_path = NULL; + char *materializations_path = NULL; + char *by_sid_path = NULL; + bool ok = false; + amduat_asl_materialization_cache_fs_t cache; + + if (!amduat_asl_materialization_cache_fs_init(&cache, root_path)) { + return false; + } + if (!amduat_gc_join_path(root_path, "index", &index_path)) { + return false; + } + if (!amduat_gc_join_path(index_path, "materializations", + &materializations_path)) { + goto cleanup; + } + if (!amduat_gc_join_path(materializations_path, "by_sid", &by_sid_path)) { + goto cleanup; + } + ok = amduat_gc_walk_materializations(ctx, by_sid_path, &cache); + +cleanup: + free(index_path); + free(materializations_path); + free(by_sid_path); + return ok; +} + +static bool amduat_gc_mark_artifact(amduat_gc_ctx_t *ctx, + amduat_artifact_t *artifact) { + if (!artifact->has_type_tag) { + return true; + } + + switch (artifact->type_tag.tag_id) { + case AMDUAT_TYPE_TAG_ASL_LOG_CHUNK_1: { + amduat_reference_t prev_ref = {0}; + bool has_prev = false; + amduat_reference_t *refs = NULL; + size_t refs_len = 0u; + if (amduat_gc_decode_log_chunk(artifact->bytes, &prev_ref, + &has_prev, &refs, &refs_len)) { + if (has_prev) { + amduat_gc_mark_ref(ctx, &prev_ref); + } + amduat_gc_mark_refs(ctx, refs, refs_len); + } + amduat_reference_free(&prev_ref); + for (size_t i = 0u; i < refs_len; ++i) { + amduat_reference_free(&refs[i]); + } + free(refs); + return true; + } + case AMDUAT_TYPE_TAG_ASL_RECORD_1: { + amduat_asl_record_t record; + bool ok = amduat_asl_record_decode_v1(artifact->bytes, &record); + if (!ok) { + return true; + } + if (record.schema.len == strlen("collection/snapshot") && + memcmp(record.schema.data, "collection/snapshot", + record.schema.len) == 0) { + amduat_reference_t *refs = NULL; + size_t refs_len = 0u; + if (amduat_gc_decode_collection_snapshot(record.payload, + &refs, &refs_len)) { + amduat_gc_mark_refs(ctx, refs, refs_len); + } + for (size_t i = 0u; i < refs_len; ++i) { + amduat_reference_free(&refs[i]); + } + free(refs); + } + amduat_asl_record_free(&record); + return true; + } + case AMDUAT_TYPE_TAG_PEL1_RESULT_1: { + amduat_pel_surface_execution_result_t result; + memset(&result, 0, sizeof(result)); + if (amduat_enc_pel1_result_decode_v1(artifact->bytes, &result)) { + amduat_gc_mark_ref(ctx, &result.scheme_ref); + amduat_gc_mark_ref(ctx, &result.program_ref); + amduat_gc_mark_refs(ctx, result.input_refs, result.input_refs_len); + amduat_gc_mark_refs(ctx, result.output_refs, result.output_refs_len); + if (result.has_params_ref) { + amduat_gc_mark_ref(ctx, &result.params_ref); + } + if (result.has_trace_ref) { + amduat_gc_mark_ref(ctx, &result.trace_ref); + } + if (result.has_store_failure) { + amduat_gc_mark_ref(ctx, &result.store_failure.failing_ref); + } + amduat_enc_pel1_result_free(&result); + } + return true; + } + case AMDUAT_TYPE_TAG_PEL_TRACE_DAG_1: { + amduat_pel_trace_dag_value_t trace; + memset(&trace, 0, sizeof(trace)); + if (amduat_enc_pel_trace_dag_decode_v1(artifact->bytes, &trace)) { + amduat_gc_mark_ref(ctx, &trace.scheme_ref); + amduat_gc_mark_ref(ctx, &trace.program_ref); + amduat_gc_mark_refs(ctx, trace.input_refs, trace.input_refs_len); + if (trace.has_params_ref) { + amduat_gc_mark_ref(ctx, &trace.params_ref); + } + if (trace.has_exec_result_ref) { + amduat_gc_mark_ref(ctx, &trace.exec_result_ref); + } + for (size_t i = 0u; i < trace.node_traces_len; ++i) { + amduat_gc_mark_refs(ctx, trace.node_traces[i].output_refs, + trace.node_traces[i].output_refs_len); + } + amduat_enc_pel_trace_dag_free(&trace); + } + return true; + } + default: + return true; + } +} + +static bool amduat_gc_mark_phase(amduat_gc_ctx_t *ctx) { + uint8_t *digest = NULL; + + digest = (uint8_t *)malloc(ctx->digest_len); + if (digest == NULL) { + return false; + } + + amduat_log(AMDUAT_LOG_DEBUG, "gc mark phase start"); + while (amduat_gc_stack_pop(&ctx->stack, digest)) { + amduat_reference_t ref = amduat_reference( + ctx->config.hash_id, amduat_octets(digest, ctx->digest_len)); + amduat_artifact_t artifact; + amduat_asl_store_error_t err = + amduat_asl_store_get(&ctx->store, ref, &artifact); + if (err != AMDUAT_ASL_STORE_OK) { + amduat_log(AMDUAT_LOG_DEBUG, "gc store get failed"); + continue; + } + amduat_gc_mark_artifact(ctx, &artifact); + amduat_artifact_free(&artifact); + } + + free(digest); + ctx->stats.marked_artifacts = ctx->marked.size; + amduat_log(AMDUAT_LOG_DEBUG, "gc mark phase done: %zu", + ctx->stats.marked_artifacts); + return true; +} + +static bool amduat_gc_mkdirs(const char *path) { + char *copy; + size_t len; + + if (path == NULL || path[0] == '\0') { + return false; + } + copy = amduat_gc_strdup(path); + if (copy == NULL) { + return false; + } + len = strlen(copy); + for (size_t i = 1u; i < len; ++i) { + if (copy[i] == '/') { + copy[i] = '\0'; + if (mkdir(copy, 0755) != 0 && errno != EEXIST) { + free(copy); + return false; + } + copy[i] = '/'; + } + } + if (mkdir(copy, 0755) != 0 && errno != EEXIST) { + free(copy); + return false; + } + free(copy); + return true; +} + +static char *amduat_gc_parent_dir(const char *path) { + const char *slash = strrchr(path, '/'); + if (slash == NULL || slash == path) { + return NULL; + } + size_t len = (size_t)(slash - path); + char *dir = (char *)malloc(len + 1u); + if (dir == NULL) { + return NULL; + } + memcpy(dir, path, len); + dir[len] = '\0'; + return dir; +} + +static bool amduat_gc_build_quarantine_root(const char *root_path, + char **out_path) { + char stamp[32]; + time_t now = time(NULL); + struct tm *tm_val; + char *base = NULL; + + if (out_path == NULL) { + return false; + } + *out_path = NULL; + + tm_val = localtime(&now); + if (tm_val == NULL) { + return false; + } + if (strftime(stamp, sizeof(stamp), "%Y%m%d-%H%M%S", tm_val) == 0u) { + return false; + } + + if (!amduat_gc_join_path(root_path, "gc_quarantine", &base)) { + return false; + } + if (!amduat_gc_join_path(base, stamp, out_path)) { + free(base); + return false; + } + free(base); + return true; +} + +static bool amduat_gc_quarantine_move(const char *root_path, + const char *quarantine_root, + const char *path, + char **out_quarantine_path) { + size_t root_len = strlen(root_path); + const char *rel = NULL; + char *target = NULL; + char *parent = NULL; + + if (out_quarantine_path == NULL) { + return false; + } + *out_quarantine_path = NULL; + + if (strncmp(path, root_path, root_len) == 0 && + path[root_len] == '/') { + rel = path + root_len + 1u; + } else { + rel = strrchr(path, '/'); + rel = rel != NULL ? rel + 1u : path; + } + + if (!amduat_gc_join_path(quarantine_root, rel, &target)) { + return false; + } + parent = amduat_gc_parent_dir(target); + if (parent == NULL || !amduat_gc_mkdirs(parent)) { + free(parent); + free(target); + return false; + } + free(parent); + if (rename(path, target) != 0) { + free(target); + return false; + } + *out_quarantine_path = target; + return true; +} + +static bool amduat_gc_sweep_dir(amduat_gc_ctx_t *ctx, + const char *dir_path, + const char *root_path, + const char *quarantine_root, + bool delete_artifacts) { + DIR *dir; + struct dirent *entry; + bool ok = true; + + dir = opendir(dir_path); + if (dir == NULL) { + return errno == ENOENT; + } + + while ((entry = readdir(dir)) != NULL) { + char *child_path = NULL; + struct stat st; + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) { + continue; + } + if (!amduat_gc_join_path(dir_path, entry->d_name, &child_path)) { + ok = false; + break; + } + if (stat(child_path, &st) != 0) { + free(child_path); + ok = false; + break; + } + if (S_ISDIR(st.st_mode)) { + if (!amduat_gc_sweep_dir(ctx, child_path, root_path, quarantine_root, + delete_artifacts)) { + free(child_path); + ok = false; + break; + } + } else if (S_ISREG(st.st_mode)) { + size_t name_len = strlen(entry->d_name); + if (name_len == ctx->digest_len * 2u) { + uint8_t *digest = (uint8_t *)malloc(ctx->digest_len); + if (digest == NULL) { + free(child_path); + ok = false; + break; + } + if (amduat_hex_decode(entry->d_name, digest, ctx->digest_len)) { + if (!amduat_gc_set_contains(&ctx->marked, digest)) { + ctx->stats.candidates++; + ctx->stats.candidate_bytes += (uint64_t)st.st_size; + if (delete_artifacts) { + char *quarantine_path = NULL; + if (!amduat_gc_quarantine_move(root_path, quarantine_root, + child_path, &quarantine_path)) { + free(digest); + free(child_path); + ok = false; + break; + } + if (unlink(quarantine_path) != 0) { + amduat_log(AMDUAT_LOG_WARN, + "gc failed to remove quarantine file: %s", + quarantine_path); + } + free(quarantine_path); + } + } + } + free(digest); + } + } + free(child_path); + } + + closedir(dir); + return ok; +} + +static bool amduat_gc_sweep(amduat_gc_ctx_t *ctx, + const char *root_path, + bool delete_artifacts) { + char profile_hex[5]; + char hash_hex[5]; + char *objects_path = NULL; + char *profile_path = NULL; + char *hash_path = NULL; + char *quarantine_root = NULL; + bool ok = false; + + snprintf(profile_hex, sizeof(profile_hex), "%04x", + (unsigned int)ctx->config.encoding_profile_id); + snprintf(hash_hex, sizeof(hash_hex), "%04x", + (unsigned int)ctx->config.hash_id); + + if (!amduat_gc_join_path(root_path, "objects", &objects_path)) { + return false; + } + if (!amduat_gc_join_path(objects_path, profile_hex, &profile_path)) { + goto cleanup; + } + if (!amduat_gc_join_path(profile_path, hash_hex, &hash_path)) { + goto cleanup; + } + if (delete_artifacts) { + if (!amduat_gc_build_quarantine_root(root_path, &quarantine_root)) { + goto cleanup; + } + if (!amduat_gc_mkdirs(quarantine_root)) { + goto cleanup; + } + } + amduat_log(AMDUAT_LOG_DEBUG, "gc sweep start"); + ok = amduat_gc_sweep_dir(ctx, hash_path, root_path, quarantine_root, + delete_artifacts); + amduat_log(AMDUAT_LOG_DEBUG, "gc sweep done: %zu candidates", + ctx->stats.candidates); + +cleanup: + free(objects_path); + free(profile_path); + free(hash_path); + free(quarantine_root); + return ok; +} + +static bool amduat_gc_prepare_store(amduat_gc_ctx_t *ctx, + const char *root_path) { + amduat_asl_store_fs_config_t cfg; + const amduat_hash_asl1_desc_t *hash_desc; + + if (!amduat_asl_store_fs_load_config(root_path, &cfg)) { + return false; + } + if (!amduat_asl_store_fs_init(&ctx->fs, cfg.config, root_path)) { + return false; + } + amduat_asl_store_init(&ctx->store, cfg.config, + amduat_asl_store_fs_ops(), &ctx->fs); + ctx->config = cfg.config; + hash_desc = amduat_hash_asl1_desc_lookup(cfg.config.hash_id); + if (hash_desc == NULL || hash_desc->digest_len == 0u) { + return false; + } + ctx->digest_len = hash_desc->digest_len; + return true; +} + +bool amduat_asl_gc_fs_run(const char *root_path, + const amduat_asl_gc_fs_options_t *opts, + amduat_asl_gc_fs_stats_t *out_stats) { + amduat_gc_ctx_t ctx; + bool ok = false; + + if (root_path == NULL || opts == NULL || out_stats == NULL) { + return false; + } + memset(&ctx, 0, sizeof(ctx)); + ctx.keep_materializations = opts->keep_materializations; + + if (!amduat_gc_prepare_store(&ctx, root_path)) { + return false; + } + if (!amduat_gc_set_init(&ctx.marked, ctx.digest_len, 1024u)) { + return false; + } + if (!amduat_gc_stack_init(&ctx.stack, ctx.digest_len, 1024u)) { + amduat_gc_set_free(&ctx.marked); + return false; + } + + if (!amduat_gc_collect_pointer_roots(&ctx, root_path)) { + goto cleanup; + } + if (ctx.keep_materializations) { + if (!amduat_gc_collect_materializations(&ctx, root_path)) { + goto cleanup; + } + } + +#ifndef NDEBUG + if (ctx.stats.pointer_roots != 0u) { + amduat_log(AMDUAT_LOG_DEBUG, "gc pointer roots: %zu", + ctx.stats.pointer_roots); + } +#endif + + if (!amduat_gc_mark_phase(&ctx)) { + goto cleanup; + } + if (!amduat_gc_sweep(&ctx, root_path, opts->delete_artifacts)) { + goto cleanup; + } + + ok = true; + +cleanup: + amduat_gc_set_free(&ctx.marked); + amduat_gc_stack_free(&ctx.stack); + *out_stats = ctx.stats; + return ok; +} diff --git a/src/asl_gc_fs.h b/src/asl_gc_fs.h new file mode 100644 index 0000000..c9adafc --- /dev/null +++ b/src/asl_gc_fs.h @@ -0,0 +1,34 @@ +#ifndef AMDUAT_API_ASL_GC_FS_H +#define AMDUAT_API_ASL_GC_FS_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + bool keep_materializations; + bool delete_artifacts; + bool dry_run; +} amduat_asl_gc_fs_options_t; + +typedef struct { + size_t pointer_roots; + size_t materialization_roots; + size_t marked_artifacts; + size_t candidates; + uint64_t candidate_bytes; +} amduat_asl_gc_fs_stats_t; + +bool amduat_asl_gc_fs_run(const char *root_path, + const amduat_asl_gc_fs_options_t *opts, + amduat_asl_gc_fs_stats_t *out_stats); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AMDUAT_API_ASL_GC_FS_H */ diff --git a/vendor/amduat b/vendor/amduat index 85c23e4..3e52697 160000 --- a/vendor/amduat +++ b/vendor/amduat @@ -1 +1 @@ -Subproject commit 85c23e49eb88768d076c26bd587ca842ce8b39ab +Subproject commit 3e526975ce2aac60b2dd09252a5edff86d4e8abe