amduat-api/notes/sid.py
2026-01-17 00:19:49 +01:00

80 lines
2.5 KiB
Python

# sid.py
from __future__ import annotations
from typing import List, Dict, Any
from sid_hashers import StructureHashStrategy
from sid_canonical import encode_str, encode_kv_pairs
# ---------------------------------------------------------------------
# SID computation
# ---------------------------------------------------------------------
def compute_sid(
*,
op: str,
child_sids: List[str],
params: Dict[str, Any],
hasher: StructureHashStrategy,
ordered_children: bool = True,
domain: str = "artifact.sid.v1",
) -> str:
"""
Compute a deterministic Merkle-style SID for an artifact.
Parameters
----------
op: str
Operation name (e.g., "tensor", "splice", "leaf.bits").
child_sids: List[str]
List of SIDs of children artifacts.
params: Dict[str, Any]
Canonicalized parameters for the operation.
hasher: StructureHashStrategy
Hash strategy to use (default SHA-256 SID hasher).
ordered_children: bool
Whether child order matters (tensor vs commutative ops).
domain: str
Domain/version for domain separation.
Returns
-------
sid: str
Hex string representing the structural ID.
"""
payload = b""
# -----------------------------------------------------------------
# Domain/version separation
# -----------------------------------------------------------------
payload += encode_str(domain)
# -----------------------------------------------------------------
# Operation name
# -----------------------------------------------------------------
payload += encode_str(op)
# -----------------------------------------------------------------
# Children SIDs
# -----------------------------------------------------------------
children = list(child_sids)
if not ordered_children:
children.sort()
payload += len(children).to_bytes(4, "big")
for c in children:
payload += encode_str(c)
# -----------------------------------------------------------------
# Canonicalized parameters
# -----------------------------------------------------------------
param_pairs = sorted((str(k), str(v)) for k, v in params.items())
payload += encode_kv_pairs(param_pairs)
# -----------------------------------------------------------------
# Compute structural hash
# -----------------------------------------------------------------
return hasher.hash_struct(payload)