364 lines
12 KiB
Python
364 lines
12 KiB
Python
|
|
# artifact.py (cache-enabled update)
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
from typing import Callable, Optional, Dict, List, Any
|
||
|
|
import numpy as np
|
||
|
|
|
||
|
|
from canonical import canonicalize_sparse
|
||
|
|
from hashers import SHA256Hash, HashStrategy
|
||
|
|
from sid import compute_sid
|
||
|
|
from sid_hashers import SHA256SIDHash, StructureHashStrategy
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Defaults
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
DEFAULT_CONTENT_HASHER: HashStrategy = SHA256Hash()
|
||
|
|
DEFAULT_SID_HASHER: StructureHashStrategy = SHA256SIDHash()
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Redundant cache
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
class ArtifactCache:
|
||
|
|
"""Redundant SID -> CID cache."""
|
||
|
|
def __init__(self):
|
||
|
|
self._cache: Dict[str, str] = {}
|
||
|
|
|
||
|
|
def get(self, sid: str) -> Optional[str]:
|
||
|
|
return self._cache.get(sid)
|
||
|
|
|
||
|
|
def put(self, sid: str, cid: str):
|
||
|
|
self._cache[sid] = cid
|
||
|
|
|
||
|
|
def has(self, sid: str) -> bool:
|
||
|
|
return sid in self._cache
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Artifact class
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
class Artifact:
|
||
|
|
"""
|
||
|
|
Lazy, DAG-based artifact.
|
||
|
|
|
||
|
|
Invariants:
|
||
|
|
- SID is always available
|
||
|
|
- CID is computed lazily, on demand
|
||
|
|
- Structure (SID) and content (CID) are orthogonal
|
||
|
|
"""
|
||
|
|
def __init__(
|
||
|
|
self,
|
||
|
|
*,
|
||
|
|
op: str,
|
||
|
|
params: Dict[str, Any],
|
||
|
|
children: List["Artifact"],
|
||
|
|
sid: str,
|
||
|
|
materializer: Optional[Callable[["Artifact", ArtifactCache], str]] = None,
|
||
|
|
content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER,
|
||
|
|
):
|
||
|
|
self.op = op
|
||
|
|
self.params = params
|
||
|
|
self.children = children
|
||
|
|
self.sid = sid # structural identity
|
||
|
|
self._cid: Optional[str] = None # semantic identity (lazy)
|
||
|
|
self._materializer = materializer
|
||
|
|
self._content_hasher = content_hasher
|
||
|
|
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
# Lazy CID access (requires cache)
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
def cid(self, cache: ArtifactCache) -> str:
|
||
|
|
if self._cid is not None:
|
||
|
|
return self._cid
|
||
|
|
if self._materializer is None:
|
||
|
|
raise RuntimeError(
|
||
|
|
f"Artifact with SID {self.sid} is not materializable"
|
||
|
|
)
|
||
|
|
self._cid = self._materializer(self, cache)
|
||
|
|
return self._cid
|
||
|
|
|
||
|
|
@property
|
||
|
|
def is_materialized(self) -> bool:
|
||
|
|
return self._cid is not None
|
||
|
|
|
||
|
|
def __repr__(self) -> str:
|
||
|
|
return (
|
||
|
|
f"Artifact(op={self.op!r}, "
|
||
|
|
f"sid={self.sid[:8]}…, "
|
||
|
|
f"cid={'set' if self._cid else 'lazy'})"
|
||
|
|
)
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Materialization helpers (cache-aware)
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def _compute_cid_from_sparse(indices: np.ndarray, values: np.ndarray, hasher: HashStrategy) -> str:
|
||
|
|
ci, cv = canonicalize_sparse(indices, values)
|
||
|
|
return hasher.hash_sparse(ci, cv)
|
||
|
|
|
||
|
|
def _materialize_tensor_lazy(left: Artifact, right: Artifact, artifact: Artifact, cache: ArtifactCache) -> str:
|
||
|
|
"""
|
||
|
|
Lazily materialize tensor by combining children indices/values.
|
||
|
|
Avoids building full dense arrays until necessary.
|
||
|
|
"""
|
||
|
|
# Materialize children first (still cached)
|
||
|
|
left_cid = left.cid(cache)
|
||
|
|
right_cid = right.cid(cache)
|
||
|
|
|
||
|
|
left_indices, left_values = left.params["_materialized"]
|
||
|
|
right_indices, right_values = right.params["_materialized"]
|
||
|
|
|
||
|
|
shift = artifact.params.get("right_bits")
|
||
|
|
if shift is None:
|
||
|
|
raise RuntimeError("tensor right_bits not set")
|
||
|
|
|
||
|
|
# Lazy generator for new indices and values
|
||
|
|
def kron_sparse_gen():
|
||
|
|
for i, vi in zip(left_indices, left_values):
|
||
|
|
for j, vj in zip(right_indices, right_values):
|
||
|
|
yield (i << shift) | j, vi * vj
|
||
|
|
|
||
|
|
# Materialize as arrays only when CID is computed
|
||
|
|
idx_list, val_list = zip(*kron_sparse_gen()) if left_indices.size * right_indices.size > 0 else ([], [])
|
||
|
|
new_indices = np.array(idx_list, dtype=np.int64)
|
||
|
|
new_values = np.array(val_list, dtype=np.complex128)
|
||
|
|
|
||
|
|
artifact.params["_materialized"] = (new_indices, new_values)
|
||
|
|
|
||
|
|
cid = _compute_cid_from_sparse(new_indices, new_values, artifact._content_hasher)
|
||
|
|
artifact._cid = cid
|
||
|
|
cache.put(artifact.sid, cid)
|
||
|
|
return cid
|
||
|
|
|
||
|
|
|
||
|
|
def materialize_artifact(artifact: Artifact, cache: ArtifactCache) -> str:
|
||
|
|
cached = cache.get(artifact.sid)
|
||
|
|
if cached is not None:
|
||
|
|
artifact._cid = cached
|
||
|
|
return cached
|
||
|
|
|
||
|
|
op = artifact.op
|
||
|
|
|
||
|
|
if op == "leaf.bits":
|
||
|
|
indices, values = artifact.params["_materialized"]
|
||
|
|
cid = _compute_cid_from_sparse(indices, values, artifact._content_hasher)
|
||
|
|
|
||
|
|
elif op == "leaf.quantum":
|
||
|
|
return _materialize_quantum_leaf(artifact, cache)
|
||
|
|
|
||
|
|
elif op == "tensor":
|
||
|
|
left, right = artifact.children
|
||
|
|
return _materialize_tensor_lazy(left, right, artifact, cache)
|
||
|
|
|
||
|
|
else:
|
||
|
|
raise NotImplementedError(f"Materialization not implemented for op={op!r}")
|
||
|
|
|
||
|
|
artifact._cid = cid
|
||
|
|
cache.put(artifact.sid, cid)
|
||
|
|
return cid
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Utility: compute bit-width
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def bit_width(artifact: Artifact) -> int:
|
||
|
|
"""
|
||
|
|
Compute the number of bits represented by an artifact.
|
||
|
|
"""
|
||
|
|
if artifact.op == "leaf.bits":
|
||
|
|
indices, _ = artifact.params["_materialized"]
|
||
|
|
max_index = int(indices.max()) if len(indices) > 0 else 0 # <-- cast to Python int
|
||
|
|
return max(1, max_index.bit_length())
|
||
|
|
elif artifact.op == "tensor":
|
||
|
|
return sum(bit_width(c) for c in artifact.children)
|
||
|
|
else:
|
||
|
|
raise NotImplementedError(f"bit_width not implemented for {artifact.op}")
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Factory functions
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def bits(
|
||
|
|
bitstring: str,
|
||
|
|
*,
|
||
|
|
sid_hasher: StructureHashStrategy = DEFAULT_SID_HASHER,
|
||
|
|
content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER,
|
||
|
|
) -> Artifact:
|
||
|
|
n = len(bitstring)
|
||
|
|
index = int(bitstring, 2)
|
||
|
|
|
||
|
|
indices = np.array([index], dtype=np.int64)
|
||
|
|
values = np.array([1.0], dtype=np.complex128)
|
||
|
|
|
||
|
|
sid = compute_sid(
|
||
|
|
op="leaf.bits",
|
||
|
|
child_sids=[],
|
||
|
|
params={"bits": bitstring},
|
||
|
|
hasher=sid_hasher,
|
||
|
|
)
|
||
|
|
|
||
|
|
art = Artifact(
|
||
|
|
op="leaf.bits",
|
||
|
|
params={"_materialized": (indices, values)},
|
||
|
|
children=[],
|
||
|
|
sid=sid,
|
||
|
|
materializer=materialize_artifact,
|
||
|
|
content_hasher=content_hasher,
|
||
|
|
)
|
||
|
|
return art
|
||
|
|
|
||
|
|
def tensor(left: Artifact, right: Artifact, *, sid_hasher: StructureHashStrategy = DEFAULT_SID_HASHER) -> Artifact:
|
||
|
|
shift = bit_width(right)
|
||
|
|
sid = compute_sid(
|
||
|
|
op="tensor",
|
||
|
|
child_sids=[left.sid, right.sid],
|
||
|
|
params={},
|
||
|
|
hasher=sid_hasher,
|
||
|
|
ordered_children=True
|
||
|
|
)
|
||
|
|
return Artifact(
|
||
|
|
op="tensor",
|
||
|
|
params={"right_bits": shift},
|
||
|
|
children=[left, right],
|
||
|
|
sid=sid,
|
||
|
|
materializer=materialize_artifact,
|
||
|
|
content_hasher=left._content_hasher,
|
||
|
|
)
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# DAG utilities
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def dag_node_count(a: Artifact, seen=None) -> int:
|
||
|
|
if seen is None:
|
||
|
|
seen = set()
|
||
|
|
if a.sid in seen:
|
||
|
|
return 0
|
||
|
|
seen.add(a.sid)
|
||
|
|
return 1 + sum(dag_node_count(c, seen) for c in a.children)
|
||
|
|
|
||
|
|
def dag_depth(a: Artifact) -> int:
|
||
|
|
if not a.children:
|
||
|
|
return 1
|
||
|
|
return 1 + max(dag_depth(c) for c in a.children)
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Quantum leaf factory
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def quantum_leaf(
|
||
|
|
amplitudes: np.ndarray,
|
||
|
|
*,
|
||
|
|
sid: Optional[str] = None,
|
||
|
|
sid_hasher: Optional[StructureHashStrategy] = DEFAULT_SID_HASHER,
|
||
|
|
content_hasher: HashStrategy = DEFAULT_CONTENT_HASHER,
|
||
|
|
) -> Artifact:
|
||
|
|
"""
|
||
|
|
Create a lazy quantum leaf.
|
||
|
|
amplitudes: 1D numpy array of complex amplitudes
|
||
|
|
"""
|
||
|
|
amplitudes = np.asarray(amplitudes, dtype=np.complex128)
|
||
|
|
n = int(np.log2(len(amplitudes)))
|
||
|
|
if 2**n != len(amplitudes):
|
||
|
|
raise ValueError("Length of amplitudes must be a power of 2")
|
||
|
|
|
||
|
|
# Default SID: computed from amplitudes (structural identity)
|
||
|
|
if sid is None:
|
||
|
|
sid = compute_sid(
|
||
|
|
op="leaf.quantum",
|
||
|
|
child_sids=[],
|
||
|
|
params={"amplitudes": amplitudes.tolist()},
|
||
|
|
hasher=sid_hasher,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Lazy _materialized: store amplitudes but not indices yet
|
||
|
|
# indices will be generated on materialization
|
||
|
|
params = {"_amplitudes": amplitudes}
|
||
|
|
|
||
|
|
return Artifact(
|
||
|
|
op="leaf.quantum",
|
||
|
|
params=params,
|
||
|
|
children=[],
|
||
|
|
sid=sid,
|
||
|
|
materializer=_materialize_quantum_leaf,
|
||
|
|
content_hasher=content_hasher,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Materializer for quantum leaves
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def _materialize_quantum_leaf(artifact: Artifact, cache: ArtifactCache) -> str:
|
||
|
|
"""
|
||
|
|
Convert quantum leaf to full sparse representation (indices, values)
|
||
|
|
and compute CID.
|
||
|
|
"""
|
||
|
|
# Check cache first
|
||
|
|
cached = cache.get(artifact.sid)
|
||
|
|
if cached is not None:
|
||
|
|
artifact._cid = cached
|
||
|
|
return cached
|
||
|
|
|
||
|
|
amplitudes = artifact.params["_amplitudes"]
|
||
|
|
dim = len(amplitudes)
|
||
|
|
indices = np.arange(dim, dtype=np.int64)
|
||
|
|
values = amplitudes.copy()
|
||
|
|
artifact.params["_materialized"] = (indices, values)
|
||
|
|
|
||
|
|
cid = _compute_cid_from_sparse(indices, values, artifact._content_hasher)
|
||
|
|
artifact._cid = cid
|
||
|
|
cache.put(artifact.sid, cid)
|
||
|
|
return cid
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# DAG helper: recursively tensor a list of artifacts (cache-aware)
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def tensor_all(artifacts: List[Artifact], sid_hasher: Optional[StructureHashStrategy] = None) -> Artifact:
|
||
|
|
"""
|
||
|
|
Recursively tensors a list of artifacts into a balanced binary DAG.
|
||
|
|
Lazy quantum leaves are supported automatically.
|
||
|
|
"""
|
||
|
|
if len(artifacts) == 1:
|
||
|
|
return artifacts[0]
|
||
|
|
mid = len(artifacts) // 2
|
||
|
|
left = tensor_all(artifacts[:mid], sid_hasher)
|
||
|
|
right = tensor_all(artifacts[mid:], sid_hasher)
|
||
|
|
return tensor(left, right, sid_hasher=sid_hasher or DEFAULT_SID_HASHER)
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# DAG materialization with cache
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def materialize_dag(root: Artifact, cache: Optional[ArtifactCache] = None) -> str:
|
||
|
|
"""
|
||
|
|
Recursively materialize a DAG starting from `root`, filling the cache.
|
||
|
|
Returns the root CID.
|
||
|
|
"""
|
||
|
|
if cache is None:
|
||
|
|
cache = ArtifactCache()
|
||
|
|
return root.cid(cache)
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# DAG metrics (cache-aware)
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
def dag_node_count_cached(a: Artifact, cache: Optional[ArtifactCache] = None) -> int:
|
||
|
|
"""
|
||
|
|
Counts nodes reachable from `a`, materializing and caching them.
|
||
|
|
"""
|
||
|
|
if cache is None:
|
||
|
|
cache = ArtifactCache()
|
||
|
|
seen = set()
|
||
|
|
def _count(node: Artifact):
|
||
|
|
if node.sid in seen:
|
||
|
|
return 0
|
||
|
|
seen.add(node.sid)
|
||
|
|
# Materialize node with cache
|
||
|
|
node.cid(cache)
|
||
|
|
return 1 + sum(_count(c) for c in node.children)
|
||
|
|
return _count(a)
|
||
|
|
|
||
|
|
def dag_depth_cached(a: Artifact, cache: Optional[ArtifactCache] = None) -> int:
|
||
|
|
"""
|
||
|
|
Computes depth of DAG from `a`, ensuring all nodes materialized in cache.
|
||
|
|
"""
|
||
|
|
if cache is None:
|
||
|
|
cache = ArtifactCache()
|
||
|
|
def _depth(node: Artifact):
|
||
|
|
node.cid(cache)
|
||
|
|
if not node.children:
|
||
|
|
return 1
|
||
|
|
return 1 + max(_depth(c) for c in node.children)
|
||
|
|
return _depth(a)
|
||
|
|
|
||
|
|
|