# artifact.py (cache-enabled update) from __future__ import annotations from typing import Callable, Optional, Dict, List, Any import numpy as np from canonical import canonicalize_sparse from hashers import SHA256Hash, HashStrategy from sid import compute_sid from sid_hashers import SHA256SIDHash, StructureHashStrategy # --------------------------------------------------------------------- # Defaults # --------------------------------------------------------------------- DEFAULT_CONTENT_HASHER: HashStrategy = SHA256Hash() DEFAULT_SID_HASHER: StructureHashStrategy = SHA256SIDHash() # --------------------------------------------------------------------- # Redundant cache # --------------------------------------------------------------------- class ArtifactCache: """Redundant SID -> CID cache.""" def __init__(self): self._cache: Dict[str, str] = {} def get(self, sid: str) -> Optional[str]: return self._cache.get(sid) def put(self, sid: str, cid: str): self._cache[sid] = cid def has(self, sid: str) -> bool: return sid in self._cache # --------------------------------------------------------------------- # Artifact class # --------------------------------------------------------------------- class Artifact: """ Lazy, DAG-based artifact. Invariants: - SID is always available - CID is computed lazily, on demand - Structure (SID) and content (CID) are orthogonal """ def __init__( self, *, op: str, params: Dict[str, Any], children: List["Artifact"], sid: str, materializer: Optional[Callable[["Artifact", ArtifactCache], str]] = None, content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER, ): self.op = op self.params = params self.children = children self.sid = sid # structural identity self._cid: Optional[str] = None # semantic identity (lazy) self._materializer = materializer self._content_hasher = content_hasher # ----------------------------------------------------------------- # Lazy CID access (requires cache) # ----------------------------------------------------------------- def cid(self, cache: ArtifactCache) -> str: if self._cid is not None: return self._cid if self._materializer is None: raise RuntimeError( f"Artifact with SID {self.sid} is not materializable" ) self._cid = self._materializer(self, cache) return self._cid @property def is_materialized(self) -> bool: return self._cid is not None def __repr__(self) -> str: return ( f"Artifact(op={self.op!r}, " f"sid={self.sid[:8]}…, " f"cid={'set' if self._cid else 'lazy'})" ) # --------------------------------------------------------------------- # Materialization helpers (cache-aware) # --------------------------------------------------------------------- def _compute_cid_from_sparse(indices: np.ndarray, values: np.ndarray, hasher: HashStrategy) -> str: ci, cv = canonicalize_sparse(indices, values) return hasher.hash_sparse(ci, cv) def _materialize_tensor_lazy(left: Artifact, right: Artifact, artifact: Artifact, cache: ArtifactCache) -> str: """ Lazily materialize tensor by combining children indices/values. Avoids building full dense arrays until necessary. """ # Materialize children first (still cached) left_cid = left.cid(cache) right_cid = right.cid(cache) left_indices, left_values = left.params["_materialized"] right_indices, right_values = right.params["_materialized"] shift = artifact.params.get("right_bits") if shift is None: raise RuntimeError("tensor right_bits not set") # Lazy generator for new indices and values def kron_sparse_gen(): for i, vi in zip(left_indices, left_values): for j, vj in zip(right_indices, right_values): yield (i << shift) | j, vi * vj # Materialize as arrays only when CID is computed idx_list, val_list = zip(*kron_sparse_gen()) if left_indices.size * right_indices.size > 0 else ([], []) new_indices = np.array(idx_list, dtype=np.int64) new_values = np.array(val_list, dtype=np.complex128) artifact.params["_materialized"] = (new_indices, new_values) cid = _compute_cid_from_sparse(new_indices, new_values, artifact._content_hasher) artifact._cid = cid cache.put(artifact.sid, cid) return cid def materialize_artifact(artifact: Artifact, cache: ArtifactCache) -> str: cached = cache.get(artifact.sid) if cached is not None: artifact._cid = cached return cached op = artifact.op if op == "leaf.bits": indices, values = artifact.params["_materialized"] cid = _compute_cid_from_sparse(indices, values, artifact._content_hasher) elif op == "leaf.quantum": return _materialize_quantum_leaf(artifact, cache) elif op == "tensor": left, right = artifact.children return _materialize_tensor_lazy(left, right, artifact, cache) else: raise NotImplementedError(f"Materialization not implemented for op={op!r}") artifact._cid = cid cache.put(artifact.sid, cid) return cid # --------------------------------------------------------------------- # Utility: compute bit-width # --------------------------------------------------------------------- def bit_width(artifact: Artifact) -> int: """ Compute the number of bits represented by an artifact. """ if artifact.op == "leaf.bits": indices, _ = artifact.params["_materialized"] max_index = int(indices.max()) if len(indices) > 0 else 0 # <-- cast to Python int return max(1, max_index.bit_length()) elif artifact.op == "tensor": return sum(bit_width(c) for c in artifact.children) else: raise NotImplementedError(f"bit_width not implemented for {artifact.op}") # --------------------------------------------------------------------- # Factory functions # --------------------------------------------------------------------- def bits( bitstring: str, *, sid_hasher: StructureHashStrategy = DEFAULT_SID_HASHER, content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER, ) -> Artifact: n = len(bitstring) index = int(bitstring, 2) indices = np.array([index], dtype=np.int64) values = np.array([1.0], dtype=np.complex128) sid = compute_sid( op="leaf.bits", child_sids=[], params={"bits": bitstring}, hasher=sid_hasher, ) art = Artifact( op="leaf.bits", params={"_materialized": (indices, values)}, children=[], sid=sid, materializer=materialize_artifact, content_hasher=content_hasher, ) return art def tensor(left: Artifact, right: Artifact, *, sid_hasher: StructureHashStrategy = DEFAULT_SID_HASHER) -> Artifact: shift = bit_width(right) sid = compute_sid( op="tensor", child_sids=[left.sid, right.sid], params={}, hasher=sid_hasher, ordered_children=True ) return Artifact( op="tensor", params={"right_bits": shift}, children=[left, right], sid=sid, materializer=materialize_artifact, content_hasher=left._content_hasher, ) # --------------------------------------------------------------------- # DAG utilities # --------------------------------------------------------------------- def dag_node_count(a: Artifact, seen=None) -> int: if seen is None: seen = set() if a.sid in seen: return 0 seen.add(a.sid) return 1 + sum(dag_node_count(c, seen) for c in a.children) def dag_depth(a: Artifact) -> int: if not a.children: return 1 return 1 + max(dag_depth(c) for c in a.children) # --------------------------------------------------------------------- # Quantum leaf factory # --------------------------------------------------------------------- def quantum_leaf( amplitudes: np.ndarray, *, sid: Optional[str] = None, sid_hasher: Optional[StructureHashStrategy] = DEFAULT_SID_HASHER, content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER, ) -> Artifact: """ Create a lazy quantum leaf. amplitudes: 1D numpy array of complex amplitudes """ amplitudes = np.asarray(amplitudes, dtype=np.complex128) n = int(np.log2(len(amplitudes))) if 2**n != len(amplitudes): raise ValueError("Length of amplitudes must be a power of 2") # Default SID: computed from amplitudes (structural identity) if sid is None: sid = compute_sid( op="leaf.quantum", child_sids=[], params={"amplitudes": amplitudes.tolist()}, hasher=sid_hasher, ) # Lazy _materialized: store amplitudes but not indices yet # indices will be generated on materialization params = {"_amplitudes": amplitudes} return Artifact( op="leaf.quantum", params=params, children=[], sid=sid, materializer=_materialize_quantum_leaf, content_hasher=content_hasher, ) # --------------------------------------------------------------------- # Materializer for quantum leaves # --------------------------------------------------------------------- def _materialize_quantum_leaf(artifact: Artifact, cache: ArtifactCache) -> str: """ Convert quantum leaf to full sparse representation (indices, values) and compute CID. """ # Check cache first cached = cache.get(artifact.sid) if cached is not None: artifact._cid = cached return cached amplitudes = artifact.params["_amplitudes"] dim = len(amplitudes) indices = np.arange(dim, dtype=np.int64) values = amplitudes.copy() artifact.params["_materialized"] = (indices, values) cid = _compute_cid_from_sparse(indices, values, artifact._content_hasher) artifact._cid = cid cache.put(artifact.sid, cid) return cid # --------------------------------------------------------------------- # DAG helper: recursively tensor a list of artifacts (cache-aware) # --------------------------------------------------------------------- def tensor_all(artifacts: List[Artifact], sid_hasher: Optional[StructureHashStrategy] = None) -> Artifact: """ Recursively tensors a list of artifacts into a balanced binary DAG. Lazy quantum leaves are supported automatically. """ if len(artifacts) == 1: return artifacts[0] mid = len(artifacts) // 2 left = tensor_all(artifacts[:mid], sid_hasher) right = tensor_all(artifacts[mid:], sid_hasher) return tensor(left, right, sid_hasher=sid_hasher or DEFAULT_SID_HASHER) # --------------------------------------------------------------------- # DAG materialization with cache # --------------------------------------------------------------------- def materialize_dag(root: Artifact, cache: Optional[ArtifactCache] = None) -> str: """ Recursively materialize a DAG starting from `root`, filling the cache. Returns the root CID. """ if cache is None: cache = ArtifactCache() return root.cid(cache) # --------------------------------------------------------------------- # DAG metrics (cache-aware) # --------------------------------------------------------------------- def dag_node_count_cached(a: Artifact, cache: Optional[ArtifactCache] = None) -> int: """ Counts nodes reachable from `a`, materializing and caching them. """ if cache is None: cache = ArtifactCache() seen = set() def _count(node: Artifact): if node.sid in seen: return 0 seen.add(node.sid) # Materialize node with cache node.cid(cache) return 1 + sum(_count(c) for c in node.children) return _count(a) def dag_depth_cached(a: Artifact, cache: Optional[ArtifactCache] = None) -> int: """ Computes depth of DAG from `a`, ensuring all nodes materialized in cache. """ if cache is None: cache = ArtifactCache() def _depth(node: Artifact): node.cid(cache) if not node.children: return 1 return 1 + max(_depth(c) for c in node.children) return _depth(a)