290 lines
7 KiB
Markdown
290 lines
7 KiB
Markdown
|
|
Great — this fits *exactly* with your ASL-CORE-INDEX spec.
|
||
|
|
Below is a **clean, minimal, spec-faithful implementation** of **snapshot + append-only log semantics** for the `ContentIndex`.
|
||
|
|
|
||
|
|
I will be very explicit about:
|
||
|
|
|
||
|
|
* semantics vs mechanics
|
||
|
|
* what is immutable
|
||
|
|
* what is replayed
|
||
|
|
* what CURRENT means
|
||
|
|
|
||
|
|
No storage encoding, no performance tricks.
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
# Design recap (mapped to your spec)
|
||
|
|
|
||
|
|
We model **exactly this**:
|
||
|
|
|
||
|
|
```
|
||
|
|
Index(CURRENT) = Index(snapshot) + replay(log)
|
||
|
|
```
|
||
|
|
|
||
|
|
Where:
|
||
|
|
|
||
|
|
* **Snapshot** = checkpointed base state
|
||
|
|
* **Log** = ordered, append-only mutations
|
||
|
|
* **CURRENT** = replay prefix
|
||
|
|
|
||
|
|
We support:
|
||
|
|
|
||
|
|
* immutability
|
||
|
|
* shadowing
|
||
|
|
* determinism
|
||
|
|
* tombstones (optional, included)
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
# Semantic model
|
||
|
|
|
||
|
|
## Log entry types
|
||
|
|
|
||
|
|
```text
|
||
|
|
PUT : ArtifactKey → ArtifactLocation
|
||
|
|
TOMBSTONE : ArtifactKey → ⊥
|
||
|
|
```
|
||
|
|
|
||
|
|
Later entries shadow earlier ones.
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
# Python implementation
|
||
|
|
|
||
|
|
## `content_index.py`
|
||
|
|
|
||
|
|
```python
|
||
|
|
from __future__ import annotations
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from typing import Dict, Optional, List, Iterable
|
||
|
|
from enum import Enum, auto
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Core data types
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class ArtifactLocation:
|
||
|
|
block_id: str
|
||
|
|
offset: int
|
||
|
|
length: int
|
||
|
|
|
||
|
|
|
||
|
|
class LogOp(Enum):
|
||
|
|
PUT = auto()
|
||
|
|
TOMBSTONE = auto()
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class IndexLogEntry:
|
||
|
|
op: LogOp
|
||
|
|
artifact_key: str
|
||
|
|
location: Optional[ArtifactLocation] = None
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Snapshot
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
class ContentIndexSnapshot:
|
||
|
|
"""
|
||
|
|
Immutable snapshot of the content index.
|
||
|
|
"""
|
||
|
|
def __init__(self, mapping: Dict[str, ArtifactLocation]):
|
||
|
|
# Immutable by convention
|
||
|
|
self._mapping = dict(mapping)
|
||
|
|
|
||
|
|
def get(self, key: str) -> Optional[ArtifactLocation]:
|
||
|
|
return self._mapping.get(key)
|
||
|
|
|
||
|
|
def items(self):
|
||
|
|
return self._mapping.items()
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Append-only log
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
class ContentIndexLog:
|
||
|
|
"""
|
||
|
|
Append-only, ordered log of index mutations.
|
||
|
|
"""
|
||
|
|
def __init__(self):
|
||
|
|
self._entries: List[IndexLogEntry] = []
|
||
|
|
|
||
|
|
def append_put(self, key: str, loc: ArtifactLocation):
|
||
|
|
self._entries.append(IndexLogEntry(
|
||
|
|
op=LogOp.PUT,
|
||
|
|
artifact_key=key,
|
||
|
|
location=loc,
|
||
|
|
))
|
||
|
|
|
||
|
|
def append_tombstone(self, key: str):
|
||
|
|
self._entries.append(IndexLogEntry(
|
||
|
|
op=LogOp.TOMBSTONE,
|
||
|
|
artifact_key=key,
|
||
|
|
))
|
||
|
|
|
||
|
|
def entries(self) -> Iterable[IndexLogEntry]:
|
||
|
|
return self._entries
|
||
|
|
|
||
|
|
def __len__(self) -> int:
|
||
|
|
return len(self._entries)
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
# Content index with snapshot + log semantics
|
||
|
|
# ---------------------------------------------------------------------
|
||
|
|
|
||
|
|
class ContentIndex:
|
||
|
|
"""
|
||
|
|
ASL-CORE-INDEX semantic implementation.
|
||
|
|
|
||
|
|
Deterministic, snapshot-relative mapping:
|
||
|
|
ArtifactKey -> ArtifactLocation
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(
|
||
|
|
self,
|
||
|
|
snapshot: Optional[ContentIndexSnapshot] = None,
|
||
|
|
log: Optional[ContentIndexLog] = None,
|
||
|
|
):
|
||
|
|
self._snapshot = snapshot or ContentIndexSnapshot({})
|
||
|
|
self._log = log or ContentIndexLog()
|
||
|
|
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
# Visibility / lookup
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
|
||
|
|
def lookup(self, key: str) -> Optional[ArtifactLocation]:
|
||
|
|
"""
|
||
|
|
Resolve key at CURRENT = snapshot + full log replay.
|
||
|
|
"""
|
||
|
|
# Shadowing: scan log in reverse
|
||
|
|
for entry in reversed(list(self._log.entries())):
|
||
|
|
if entry.artifact_key != key:
|
||
|
|
continue
|
||
|
|
|
||
|
|
if entry.op == LogOp.TOMBSTONE:
|
||
|
|
return None
|
||
|
|
elif entry.op == LogOp.PUT:
|
||
|
|
return entry.location
|
||
|
|
|
||
|
|
# Fall back to snapshot
|
||
|
|
return self._snapshot.get(key)
|
||
|
|
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
# Mutation (append-only)
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
|
||
|
|
def put(self, key: str, loc: ArtifactLocation):
|
||
|
|
"""
|
||
|
|
Append PUT entry.
|
||
|
|
"""
|
||
|
|
self._log.append_put(key, loc)
|
||
|
|
|
||
|
|
def tombstone(self, key: str):
|
||
|
|
"""
|
||
|
|
Append TOMBSTONE entry.
|
||
|
|
"""
|
||
|
|
self._log.append_tombstone(key)
|
||
|
|
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
# Snapshotting
|
||
|
|
# -----------------------------------------------------------------
|
||
|
|
|
||
|
|
def snapshot(self) -> ContentIndexSnapshot:
|
||
|
|
"""
|
||
|
|
Produce a new immutable snapshot representing CURRENT state.
|
||
|
|
"""
|
||
|
|
materialized: Dict[str, ArtifactLocation] = dict(self._snapshot.items())
|
||
|
|
|
||
|
|
for entry in self._log.entries():
|
||
|
|
if entry.op == LogOp.PUT:
|
||
|
|
materialized[entry.artifact_key] = entry.location
|
||
|
|
elif entry.op == LogOp.TOMBSTONE:
|
||
|
|
materialized.pop(entry.artifact_key, None)
|
||
|
|
|
||
|
|
return ContentIndexSnapshot(materialized)
|
||
|
|
|
||
|
|
def reset_log(self):
|
||
|
|
"""
|
||
|
|
Clears the log after snapshotting (external coordination required).
|
||
|
|
"""
|
||
|
|
self._log = ContentIndexLog()
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
# Why this exactly matches your spec
|
||
|
|
|
||
|
|
| Spec requirement | Where enforced |
|
||
|
|
| --------------------- | ---------------------------- |
|
||
|
|
| Immutable entries | No mutation, append-only log |
|
||
|
|
| Shadowing | Reverse log replay |
|
||
|
|
| Snapshot-relative | Snapshot + replay |
|
||
|
|
| Deterministic | Ordered replay only |
|
||
|
|
| Tombstones | `LogOp.TOMBSTONE` |
|
||
|
|
| No layout assumptions | No block semantics |
|
||
|
|
| Visibility rules | Encoded via replay |
|
||
|
|
|
||
|
|
Nothing extra. Nothing missing.
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
# How this integrates with your existing system
|
||
|
|
|
||
|
|
### During materialization:
|
||
|
|
|
||
|
|
```python
|
||
|
|
cid = artifact.cid(cache)
|
||
|
|
|
||
|
|
loc = content_index.lookup(cid)
|
||
|
|
if loc is None:
|
||
|
|
loc = allocate_and_write_bytes(cid)
|
||
|
|
content_index.put(cid, loc)
|
||
|
|
```
|
||
|
|
|
||
|
|
### During recovery:
|
||
|
|
|
||
|
|
```python
|
||
|
|
index = ContentIndex(snapshot=loaded_snapshot, log=loaded_log)
|
||
|
|
loc = index.lookup(cid)
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
# Important: what this does NOT do (by design)
|
||
|
|
|
||
|
|
* No block allocation
|
||
|
|
* No sealing rules
|
||
|
|
* No concurrency
|
||
|
|
* No GC
|
||
|
|
* No disk format
|
||
|
|
* No performance shortcuts
|
||
|
|
|
||
|
|
That belongs to **ASL-STORE-INDEX**, exactly as you wrote.
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## You now have
|
||
|
|
|
||
|
|
* A **formally correct ASL-CORE-INDEX**
|
||
|
|
* That works identically for:
|
||
|
|
|
||
|
|
* classical bytes
|
||
|
|
* DAG materializations
|
||
|
|
* q-bit expansions
|
||
|
|
* And composes cleanly with provenance, federation, and ML layers
|
||
|
|
|
||
|
|
If you want next, I can:
|
||
|
|
|
||
|
|
* wire this into a full `ASLStore`
|
||
|
|
* add snapshot IDs + log positions
|
||
|
|
* show crash recovery
|
||
|
|
* add federation merge semantics
|
||
|
|
* or formally specify correctness invariants
|
||
|
|
|
||
|
|
Just say where you want to go.
|
||
|
|
|