80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
# sid.py
|
|
|
|
from __future__ import annotations
|
|
from typing import List, Dict, Any
|
|
from sid_hashers import StructureHashStrategy
|
|
from sid_canonical import encode_str, encode_kv_pairs
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
# SID computation
|
|
# ---------------------------------------------------------------------
|
|
|
|
def compute_sid(
|
|
*,
|
|
op: str,
|
|
child_sids: List[str],
|
|
params: Dict[str, Any],
|
|
hasher: StructureHashStrategy,
|
|
ordered_children: bool = True,
|
|
domain: str = "artifact.sid.v1",
|
|
) -> str:
|
|
"""
|
|
Compute a deterministic Merkle-style SID for an artifact.
|
|
|
|
Parameters
|
|
----------
|
|
op: str
|
|
Operation name (e.g., "tensor", "splice", "leaf.bits").
|
|
child_sids: List[str]
|
|
List of SIDs of children artifacts.
|
|
params: Dict[str, Any]
|
|
Canonicalized parameters for the operation.
|
|
hasher: StructureHashStrategy
|
|
Hash strategy to use (default SHA-256 SID hasher).
|
|
ordered_children: bool
|
|
Whether child order matters (tensor vs commutative ops).
|
|
domain: str
|
|
Domain/version for domain separation.
|
|
|
|
Returns
|
|
-------
|
|
sid: str
|
|
Hex string representing the structural ID.
|
|
"""
|
|
|
|
payload = b""
|
|
|
|
# -----------------------------------------------------------------
|
|
# Domain/version separation
|
|
# -----------------------------------------------------------------
|
|
payload += encode_str(domain)
|
|
|
|
# -----------------------------------------------------------------
|
|
# Operation name
|
|
# -----------------------------------------------------------------
|
|
payload += encode_str(op)
|
|
|
|
# -----------------------------------------------------------------
|
|
# Children SIDs
|
|
# -----------------------------------------------------------------
|
|
children = list(child_sids)
|
|
if not ordered_children:
|
|
children.sort()
|
|
|
|
payload += len(children).to_bytes(4, "big")
|
|
for c in children:
|
|
payload += encode_str(c)
|
|
|
|
# -----------------------------------------------------------------
|
|
# Canonicalized parameters
|
|
# -----------------------------------------------------------------
|
|
param_pairs = sorted((str(k), str(v)) for k, v in params.items())
|
|
payload += encode_kv_pairs(param_pairs)
|
|
|
|
# -----------------------------------------------------------------
|
|
# Compute structural hash
|
|
# -----------------------------------------------------------------
|
|
return hasher.hash_struct(payload)
|
|
|