93 lines
2.6 KiB
Python
93 lines
2.6 KiB
Python
|
|
# hashers.py
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
from abc import ABC, abstractmethod
|
||
|
|
import hashlib
|
||
|
|
import numpy as np
|
||
|
|
from typing import Tuple
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Content Hash Strategy Interface
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
class HashStrategy(ABC):
|
||
|
|
"""
|
||
|
|
Abstract interface for content hashing.
|
||
|
|
"""
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def hash_bytes(self, data: bytes) -> str:
|
||
|
|
"""
|
||
|
|
Hash arbitrary bytes and return hex digest.
|
||
|
|
"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def hash_sparse(self, indices: np.ndarray, values: np.ndarray) -> str:
|
||
|
|
"""
|
||
|
|
Hash a sparse representation of amplitudes.
|
||
|
|
"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Default SHA-256 Implementation
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
class SHA256Hash(HashStrategy):
|
||
|
|
"""
|
||
|
|
SHA-256 hash strategy for content-addressed artifacts.
|
||
|
|
"""
|
||
|
|
|
||
|
|
name = "sha256.content.v1"
|
||
|
|
|
||
|
|
def hash_bytes(self, data: bytes) -> str:
|
||
|
|
"""
|
||
|
|
Hash arbitrary bytes deterministically.
|
||
|
|
"""
|
||
|
|
h = hashlib.sha256()
|
||
|
|
h.update(data)
|
||
|
|
return h.hexdigest()
|
||
|
|
|
||
|
|
def hash_sparse(self, indices: np.ndarray, values: np.ndarray) -> str:
|
||
|
|
"""
|
||
|
|
Hash a sparse set of indices and amplitudes.
|
||
|
|
Deterministic and cross-platform safe.
|
||
|
|
"""
|
||
|
|
|
||
|
|
if indices.ndim != 1 or values.ndim != 1:
|
||
|
|
raise ValueError("indices and values must be 1-D arrays")
|
||
|
|
|
||
|
|
if len(indices) != len(values):
|
||
|
|
raise ValueError("indices and values must have the same length")
|
||
|
|
|
||
|
|
# Serialize deterministically: length + index-value pairs
|
||
|
|
buf = len(indices).to_bytes(8, "big")
|
||
|
|
for idx, val in zip(indices, values):
|
||
|
|
buf += int(idx).to_bytes(8, "big", signed=False)
|
||
|
|
# IEEE 754 double-precision real + imag
|
||
|
|
buf += np.float64(val.real).tobytes()
|
||
|
|
buf += np.float64(val.imag).tobytes()
|
||
|
|
|
||
|
|
return self.hash_bytes(buf)
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Utility / Helpers
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
def hash_bytes_sha256(data: bytes) -> str:
|
||
|
|
"""
|
||
|
|
Convenience wrapper for SHA-256 hashing.
|
||
|
|
"""
|
||
|
|
return SHA256Hash().hash_bytes(data)
|
||
|
|
|
||
|
|
|
||
|
|
def hash_sparse_sha256(indices: np.ndarray, values: np.ndarray) -> str:
|
||
|
|
"""
|
||
|
|
Convenience wrapper for sparse SHA-256 hashing.
|
||
|
|
"""
|
||
|
|
return SHA256Hash().hash_sparse(indices, values)
|
||
|
|
|