amduat-api/notes/artifact.py

364 lines
12 KiB
Python
Raw Normal View History

# artifact.py (cache-enabled update)
from __future__ import annotations
from typing import Callable, Optional, Dict, List, Any
import numpy as np
from canonical import canonicalize_sparse
from hashers import SHA256Hash, HashStrategy
from sid import compute_sid
from sid_hashers import SHA256SIDHash, StructureHashStrategy
# ---------------------------------------------------------------------
# Defaults
# ---------------------------------------------------------------------
DEFAULT_CONTENT_HASHER: HashStrategy = SHA256Hash()
DEFAULT_SID_HASHER: StructureHashStrategy = SHA256SIDHash()
# ---------------------------------------------------------------------
# Redundant cache
# ---------------------------------------------------------------------
class ArtifactCache:
"""Redundant SID -> CID cache."""
def __init__(self):
self._cache: Dict[str, str] = {}
def get(self, sid: str) -> Optional[str]:
return self._cache.get(sid)
def put(self, sid: str, cid: str):
self._cache[sid] = cid
def has(self, sid: str) -> bool:
return sid in self._cache
# ---------------------------------------------------------------------
# Artifact class
# ---------------------------------------------------------------------
class Artifact:
"""
Lazy, DAG-based artifact.
Invariants:
- SID is always available
- CID is computed lazily, on demand
- Structure (SID) and content (CID) are orthogonal
"""
def __init__(
self,
*,
op: str,
params: Dict[str, Any],
children: List["Artifact"],
sid: str,
materializer: Optional[Callable[["Artifact", ArtifactCache], str]] = None,
content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER,
):
self.op = op
self.params = params
self.children = children
self.sid = sid # structural identity
self._cid: Optional[str] = None # semantic identity (lazy)
self._materializer = materializer
self._content_hasher = content_hasher
# -----------------------------------------------------------------
# Lazy CID access (requires cache)
# -----------------------------------------------------------------
def cid(self, cache: ArtifactCache) -> str:
if self._cid is not None:
return self._cid
if self._materializer is None:
raise RuntimeError(
f"Artifact with SID {self.sid} is not materializable"
)
self._cid = self._materializer(self, cache)
return self._cid
@property
def is_materialized(self) -> bool:
return self._cid is not None
def __repr__(self) -> str:
return (
f"Artifact(op={self.op!r}, "
f"sid={self.sid[:8]}…, "
f"cid={'set' if self._cid else 'lazy'})"
)
# ---------------------------------------------------------------------
# Materialization helpers (cache-aware)
# ---------------------------------------------------------------------
def _compute_cid_from_sparse(indices: np.ndarray, values: np.ndarray, hasher: HashStrategy) -> str:
ci, cv = canonicalize_sparse(indices, values)
return hasher.hash_sparse(ci, cv)
def _materialize_tensor_lazy(left: Artifact, right: Artifact, artifact: Artifact, cache: ArtifactCache) -> str:
"""
Lazily materialize tensor by combining children indices/values.
Avoids building full dense arrays until necessary.
"""
# Materialize children first (still cached)
left_cid = left.cid(cache)
right_cid = right.cid(cache)
left_indices, left_values = left.params["_materialized"]
right_indices, right_values = right.params["_materialized"]
shift = artifact.params.get("right_bits")
if shift is None:
raise RuntimeError("tensor right_bits not set")
# Lazy generator for new indices and values
def kron_sparse_gen():
for i, vi in zip(left_indices, left_values):
for j, vj in zip(right_indices, right_values):
yield (i << shift) | j, vi * vj
# Materialize as arrays only when CID is computed
idx_list, val_list = zip(*kron_sparse_gen()) if left_indices.size * right_indices.size > 0 else ([], [])
new_indices = np.array(idx_list, dtype=np.int64)
new_values = np.array(val_list, dtype=np.complex128)
artifact.params["_materialized"] = (new_indices, new_values)
cid = _compute_cid_from_sparse(new_indices, new_values, artifact._content_hasher)
artifact._cid = cid
cache.put(artifact.sid, cid)
return cid
def materialize_artifact(artifact: Artifact, cache: ArtifactCache) -> str:
cached = cache.get(artifact.sid)
if cached is not None:
artifact._cid = cached
return cached
op = artifact.op
if op == "leaf.bits":
indices, values = artifact.params["_materialized"]
cid = _compute_cid_from_sparse(indices, values, artifact._content_hasher)
elif op == "leaf.quantum":
return _materialize_quantum_leaf(artifact, cache)
elif op == "tensor":
left, right = artifact.children
return _materialize_tensor_lazy(left, right, artifact, cache)
else:
raise NotImplementedError(f"Materialization not implemented for op={op!r}")
artifact._cid = cid
cache.put(artifact.sid, cid)
return cid
# ---------------------------------------------------------------------
# Utility: compute bit-width
# ---------------------------------------------------------------------
def bit_width(artifact: Artifact) -> int:
"""
Compute the number of bits represented by an artifact.
"""
if artifact.op == "leaf.bits":
indices, _ = artifact.params["_materialized"]
max_index = int(indices.max()) if len(indices) > 0 else 0 # <-- cast to Python int
return max(1, max_index.bit_length())
elif artifact.op == "tensor":
return sum(bit_width(c) for c in artifact.children)
else:
raise NotImplementedError(f"bit_width not implemented for {artifact.op}")
# ---------------------------------------------------------------------
# Factory functions
# ---------------------------------------------------------------------
def bits(
bitstring: str,
*,
sid_hasher: StructureHashStrategy = DEFAULT_SID_HASHER,
content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER,
) -> Artifact:
n = len(bitstring)
index = int(bitstring, 2)
indices = np.array([index], dtype=np.int64)
values = np.array([1.0], dtype=np.complex128)
sid = compute_sid(
op="leaf.bits",
child_sids=[],
params={"bits": bitstring},
hasher=sid_hasher,
)
art = Artifact(
op="leaf.bits",
params={"_materialized": (indices, values)},
children=[],
sid=sid,
materializer=materialize_artifact,
content_hasher=content_hasher,
)
return art
def tensor(left: Artifact, right: Artifact, *, sid_hasher: StructureHashStrategy = DEFAULT_SID_HASHER) -> Artifact:
shift = bit_width(right)
sid = compute_sid(
op="tensor",
child_sids=[left.sid, right.sid],
params={},
hasher=sid_hasher,
ordered_children=True
)
return Artifact(
op="tensor",
params={"right_bits": shift},
children=[left, right],
sid=sid,
materializer=materialize_artifact,
content_hasher=left._content_hasher,
)
# ---------------------------------------------------------------------
# DAG utilities
# ---------------------------------------------------------------------
def dag_node_count(a: Artifact, seen=None) -> int:
if seen is None:
seen = set()
if a.sid in seen:
return 0
seen.add(a.sid)
return 1 + sum(dag_node_count(c, seen) for c in a.children)
def dag_depth(a: Artifact) -> int:
if not a.children:
return 1
return 1 + max(dag_depth(c) for c in a.children)
# ---------------------------------------------------------------------
# Quantum leaf factory
# ---------------------------------------------------------------------
def quantum_leaf(
amplitudes: np.ndarray,
*,
sid: Optional[str] = None,
sid_hasher: Optional[StructureHashStrategy] = DEFAULT_SID_HASHER,
content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER,
) -> Artifact:
"""
Create a lazy quantum leaf.
amplitudes: 1D numpy array of complex amplitudes
"""
amplitudes = np.asarray(amplitudes, dtype=np.complex128)
n = int(np.log2(len(amplitudes)))
if 2**n != len(amplitudes):
raise ValueError("Length of amplitudes must be a power of 2")
# Default SID: computed from amplitudes (structural identity)
if sid is None:
sid = compute_sid(
op="leaf.quantum",
child_sids=[],
params={"amplitudes": amplitudes.tolist()},
hasher=sid_hasher,
)
# Lazy _materialized: store amplitudes but not indices yet
# indices will be generated on materialization
params = {"_amplitudes": amplitudes}
return Artifact(
op="leaf.quantum",
params=params,
children=[],
sid=sid,
materializer=_materialize_quantum_leaf,
content_hasher=content_hasher,
)
# ---------------------------------------------------------------------
# Materializer for quantum leaves
# ---------------------------------------------------------------------
def _materialize_quantum_leaf(artifact: Artifact, cache: ArtifactCache) -> str:
"""
Convert quantum leaf to full sparse representation (indices, values)
and compute CID.
"""
# Check cache first
cached = cache.get(artifact.sid)
if cached is not None:
artifact._cid = cached
return cached
amplitudes = artifact.params["_amplitudes"]
dim = len(amplitudes)
indices = np.arange(dim, dtype=np.int64)
values = amplitudes.copy()
artifact.params["_materialized"] = (indices, values)
cid = _compute_cid_from_sparse(indices, values, artifact._content_hasher)
artifact._cid = cid
cache.put(artifact.sid, cid)
return cid
# ---------------------------------------------------------------------
# DAG helper: recursively tensor a list of artifacts (cache-aware)
# ---------------------------------------------------------------------
def tensor_all(artifacts: List[Artifact], sid_hasher: Optional[StructureHashStrategy] = None) -> Artifact:
"""
Recursively tensors a list of artifacts into a balanced binary DAG.
Lazy quantum leaves are supported automatically.
"""
if len(artifacts) == 1:
return artifacts[0]
mid = len(artifacts) // 2
left = tensor_all(artifacts[:mid], sid_hasher)
right = tensor_all(artifacts[mid:], sid_hasher)
return tensor(left, right, sid_hasher=sid_hasher or DEFAULT_SID_HASHER)
# ---------------------------------------------------------------------
# DAG materialization with cache
# ---------------------------------------------------------------------
def materialize_dag(root: Artifact, cache: Optional[ArtifactCache] = None) -> str:
"""
Recursively materialize a DAG starting from `root`, filling the cache.
Returns the root CID.
"""
if cache is None:
cache = ArtifactCache()
return root.cid(cache)
# ---------------------------------------------------------------------
# DAG metrics (cache-aware)
# ---------------------------------------------------------------------
def dag_node_count_cached(a: Artifact, cache: Optional[ArtifactCache] = None) -> int:
"""
Counts nodes reachable from `a`, materializing and caching them.
"""
if cache is None:
cache = ArtifactCache()
seen = set()
def _count(node: Artifact):
if node.sid in seen:
return 0
seen.add(node.sid)
# Materialize node with cache
node.cid(cache)
return 1 + sum(_count(c) for c in node.children)
return _count(a)
def dag_depth_cached(a: Artifact, cache: Optional[ArtifactCache] = None) -> int:
"""
Computes depth of DAG from `a`, ensuring all nodes materialized in cache.
"""
if cache is None:
cache = ArtifactCache()
def _depth(node: Artifact):
node.cid(cache)
if not node.children:
return 1
return 1 + max(_depth(c) for c in node.children)
return _depth(a)