Compare commits
10 commits
ce10f2b261
...
b450e65453
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b450e65453 | ||
|
|
2f393701fe | ||
|
|
29f528c2cb | ||
|
|
437c8843df | ||
|
|
7250f03717 | ||
|
|
92aa3b6a3f | ||
|
|
7af01c8a53 | ||
|
|
081225ec07 | ||
|
|
f0e3768412 | ||
|
|
610a3e4848 |
30
README.md
30
README.md
|
|
@ -23,8 +23,10 @@ cp config/env.example config/env.local
|
||||||
```
|
```
|
||||||
`dev_start_daemon.sh` initializes the store for the selected backend and, when
|
`dev_start_daemon.sh` initializes the store for the selected backend and, when
|
||||||
`STORE_BACKEND=index`, runs a quick startup write probe. If index writes are
|
`STORE_BACKEND=index`, runs a quick startup write probe. If index writes are
|
||||||
unhealthy, it automatically falls back to `fs` (configurable via
|
unhealthy, it first tries a one-time index store repair (backup + re-init) and
|
||||||
`INDEX_BACKEND_PROBE` and `INDEX_BACKEND_FALLBACK` in `config/env.local`).
|
then falls back to `fs` if still unhealthy (configurable via
|
||||||
|
`INDEX_BACKEND_PROBE`, `INDEX_BACKEND_REPAIR`, and `INDEX_BACKEND_FALLBACK` in
|
||||||
|
`config/env.local`).
|
||||||
|
|
||||||
3. Run startup checks against the daemon socket:
|
3. Run startup checks against the daemon socket:
|
||||||
|
|
||||||
|
|
@ -50,6 +52,17 @@ Use the integrated v2 app flow wrapper:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./scripts/v2_app.sh startup-check
|
./scripts/v2_app.sh startup-check
|
||||||
|
./scripts/v2_app.sh ai-vertical-slice
|
||||||
|
./scripts/v2_app.sh ai-vertical-slice --skip-evals
|
||||||
|
./scripts/v2_app.sh ai-vertical-slice --auto-start-daemon
|
||||||
|
./scripts/v2_app.sh ai-agent 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'
|
||||||
|
./scripts/v2_app.sh ai-agent --json --require-evidence --max-steps 3 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'
|
||||||
|
./scripts/v2_app.sh ai-check
|
||||||
|
./scripts/v2_app.sh ai-generate 'Summarize retrieval behavior in one sentence.'
|
||||||
|
./scripts/v2_app.sh ai-generate --json 'Summarize retrieval behavior in one sentence.'
|
||||||
|
./scripts/v2_app.sh ai-answer 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
|
||||||
|
./scripts/v2_app.sh ai-answer --json 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
|
||||||
|
./scripts/v2_app.sh ai-answer --json --require-evidence 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
|
||||||
./scripts/v2_app.sh ingest '{"idempotency_key":"k1","mode":"continue_on_error","nodes":[{"name":"doc-1"}]}'
|
./scripts/v2_app.sh ingest '{"idempotency_key":"k1","mode":"continue_on_error","nodes":[{"name":"doc-1"}]}'
|
||||||
./scripts/v2_app.sh sync-once
|
./scripts/v2_app.sh sync-once
|
||||||
./scripts/v2_app.sh consume-changes --once
|
./scripts/v2_app.sh consume-changes --once
|
||||||
|
|
@ -57,6 +70,12 @@ Use the integrated v2 app flow wrapper:
|
||||||
./scripts/v2_app.sh tombstone '<edge_ref>'
|
./scripts/v2_app.sh tombstone '<edge_ref>'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
AI lane notes:
|
||||||
|
|
||||||
|
- Plan and scope guardrails: `docs/ai-plan.md`
|
||||||
|
- Deterministic seed payload: `ai/fixtures/seed_batch.json`
|
||||||
|
- Agent loop checkpoints: `ai/runs/agent-run-*.json` (updated at start, each planner step, and completion with `status` + timestamps)
|
||||||
|
|
||||||
Run integration coverage (requires running `amduatd` + `jq`):
|
Run integration coverage (requires running `amduatd` + `jq`):
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
|
@ -70,6 +89,13 @@ Run local cursor/handler semantics checks (no daemon required):
|
||||||
./tests/changes_consumer_handler.sh
|
./tests/changes_consumer_handler.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Run AI connectivity and generation smoke eval (no daemon required):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./tests/ai_eval.sh
|
||||||
|
./tests/ai_answer_eval.sh
|
||||||
|
```
|
||||||
|
|
||||||
Run a fast end-to-end smoke (startup + ingest + sync + retrieve + tombstone):
|
Run a fast end-to-end smoke (startup + ingest + sync + retrieve + tombstone):
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
|
|
||||||
26
ai/fixtures/seed_batch.json
Normal file
26
ai/fixtures/seed_batch.json
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
{
|
||||||
|
"idempotency_key": "ai-slice-seed",
|
||||||
|
"mode": "continue_on_error",
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "doc-ai-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "topic-ai-alpha"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"edges": [
|
||||||
|
{
|
||||||
|
"subject": "doc-ai-1",
|
||||||
|
"predicate": "ms.within_domain",
|
||||||
|
"object": "topic-ai-alpha",
|
||||||
|
"provenance": {
|
||||||
|
"source_uri": "urn:app:ai-seed",
|
||||||
|
"extractor": "ai-slice-loader",
|
||||||
|
"observed_at": 1,
|
||||||
|
"ingested_at": 2,
|
||||||
|
"trace_id": "trace-ai-seed-1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
1
ai/runs/agent-run-20260208-071138-238480.json
Normal file
1
ai/runs/agent-run-20260208-071138-238480.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"run_id":"20260208-071138-238480","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"retrieve_failed","steps":[],"final_answer":{"response":"Agent loop ended without answer (retrieve_failed).","done_reason":"agent_stopped"}}
|
||||||
1
ai/runs/agent-run-20260208-071315-239391.json
Normal file
1
ai/runs/agent-run-20260208-071315-239391.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"run_id":"20260208-071315-239391","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"planner_stop","steps":[{"step":1,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":0,"edges":0},"plan":{"action":"refine_query","next_roots_csv":"","next_goals_csv":"","reason":"Insufficient retrieval context to determine the domain of doc-ai-1."}},{"step":2,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":1,"edges":0},"plan":{"action":"stop","next_roots_csv":"","next_goals_csv":"","reason":"Insufficient context to determine the domain of doc-ai-1."}}],"final_answer":{"response":"Agent loop ended without answer (planner_stop).","done_reason":"agent_stopped"}}
|
||||||
1
ai/runs/agent-run-20260208-071727-240673.json
Normal file
1
ai/runs/agent-run-20260208-071727-240673.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"run_id":"20260208-071727-240673","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"answered","steps":[{"step":1,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":2,"edges":1},"plan":{"action":"answer","next_roots_csv":"","next_goals_csv":"","reason":"The current context provides sufficient information to answer the question."}}],"final_answer":{"model":"qwen2.5-coder:7b","created_at":"2026-02-08T06:17:29.568796336Z","response":"topic-ai-alpha","done":true,"done_reason":"stop","context":[151644,8948,198,2610,525,1207,16948,11,3465,553,54364,14817,13,1446,525,264,10950,17847,13,151645,198,151644,872,198,16141,279,3405,1667,26687,279,4771,2266,3685,624,2679,279,2266,374,38313,11,1977,6896,1128,374,7402,624,19434,279,4226,63594,382,11212,2266,510,12288,510,12,4629,12,2143,12,16,198,41122,510,12,4629,12,2143,12,16,1177,1011,18164,258,20111,29052,8544,12,2143,64538,271,12288,510,12,8544,12,2143,64538,198,41122,510,12,4629,12,2143,12,16,1177,1011,18164,258,20111,29052,8544,12,2143,64538,271,14582,510,3838,7947,374,4629,12,2143,12,16,304,30,151645,198,151644,77091,198,16411,12,2143,64538],"total_duration":175968138,"load_duration":92265198,"prompt_eval_count":126,"prompt_eval_duration":13491488,"eval_count":5,"eval_duration":57138095,"evidence":[{"edge_ref":"00012d3c287ce8af9c400519c995041274d14319bccc204303aa97768706d90ddc87","subject":"doc-ai-1","predicate":"ms.within_domain","predicate_name":"ms.within_domain","object":"topic-ai-alpha","predicate_ref":"000140ebd9f62d224d780ebf2da668c3175fd15fc2bcceba9a99df1be4c5184329bb","subject_ref":"0001ccff97484870025da6d1b7b417f4678c9e5e541b2bebe80289ffdc07505b7c26","object_ref":"00018a5ed5c3c89fc445549fd5c917b1ccf1165faef6508ad886776cdd9553f437a7","depth":1,"reasons":["reachable_from_roots","goal_predicate_match"]}],"grounding":{"has_evidence":true,"require_evidence":false}}}
|
||||||
|
|
@ -9,6 +9,8 @@ STORE_BACKEND="index"
|
||||||
# For index backend, run a startup write probe and fallback to fs if broken.
|
# For index backend, run a startup write probe and fallback to fs if broken.
|
||||||
INDEX_BACKEND_PROBE="1"
|
INDEX_BACKEND_PROBE="1"
|
||||||
INDEX_BACKEND_FALLBACK="fs"
|
INDEX_BACKEND_FALLBACK="fs"
|
||||||
|
# Attempt one-time index root repair (backup + index init --force) before fs fallback.
|
||||||
|
INDEX_BACKEND_REPAIR="1"
|
||||||
FS_FALLBACK_STORE_ROOT=".amduat-asl-fs"
|
FS_FALLBACK_STORE_ROOT=".amduat-asl-fs"
|
||||||
# AMDUATD_BIN="/path/to/amduatd"
|
# AMDUATD_BIN="/path/to/amduatd"
|
||||||
# ASL_BIN="/path/to/amduat-asl"
|
# ASL_BIN="/path/to/amduat-asl"
|
||||||
|
|
@ -26,3 +28,10 @@ RETRY_MAX_MS="2000"
|
||||||
# Curl timeouts
|
# Curl timeouts
|
||||||
CURL_CONNECT_TIMEOUT_SECONDS="2"
|
CURL_CONNECT_TIMEOUT_SECONDS="2"
|
||||||
CURL_MAX_TIME_SECONDS="30"
|
CURL_MAX_TIME_SECONDS="30"
|
||||||
|
|
||||||
|
# Optional Ollama AI settings
|
||||||
|
OLLAMA_HOST="http://127.0.0.1:11434"
|
||||||
|
OLLAMA_MODEL="qwen2.5-coder:7b"
|
||||||
|
OLLAMA_TIMEOUT_SECONDS="60"
|
||||||
|
AI_CONTEXT_MAX_CHARS="12000"
|
||||||
|
AI_EVIDENCE_MAX_EDGES="5"
|
||||||
|
|
|
||||||
45
docs/ai-plan.md
Normal file
45
docs/ai-plan.md
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
# AI v2 Plan
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Ship one reliable AI vertical slice on top of the v2 graph API:
|
||||||
|
|
||||||
|
1. ingest deterministic graph facts,
|
||||||
|
2. retrieve graph context for a root,
|
||||||
|
3. answer with grounding evidence,
|
||||||
|
4. execute a minimal planner loop with persisted run state.
|
||||||
|
|
||||||
|
## Scope Rules
|
||||||
|
|
||||||
|
- Prioritize app-level AI workflow work in this repo.
|
||||||
|
- Treat backend fault investigation as out-of-scope unless it blocks the vertical slice.
|
||||||
|
- Keep `vendor/amduat-api` pinned while iterating on prompts/evals.
|
||||||
|
|
||||||
|
## Working Lane
|
||||||
|
|
||||||
|
- Use branch: `feat/ai-v2-experiments`.
|
||||||
|
- Keep core command stable: `./scripts/v2_app.sh ai-vertical-slice`.
|
||||||
|
- Track prompt/eval tweaks under `ai/`.
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
|
||||||
|
- `./scripts/v2_app.sh ai-vertical-slice` passes on a running daemon with Ollama.
|
||||||
|
- Output contains non-empty answer text with `grounding.has_evidence == true`.
|
||||||
|
- `tests/ai_eval.sh` and `tests/ai_answer_eval.sh` pass in the same environment.
|
||||||
|
- `./scripts/v2_app.sh ai-agent --json 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'` writes checkpoint state under `ai/runs/`.
|
||||||
|
|
||||||
|
## Quick Run Sequence
|
||||||
|
|
||||||
|
1. Start daemon (or let the vertical slice auto-start it):
|
||||||
|
`./scripts/dev_start_daemon.sh`
|
||||||
|
2. Run AI vertical slice:
|
||||||
|
`./scripts/v2_app.sh ai-vertical-slice`
|
||||||
|
3. If daemon may not be running, use:
|
||||||
|
`./scripts/v2_app.sh ai-vertical-slice --auto-start-daemon`
|
||||||
|
4. Run minimal agent loop:
|
||||||
|
`./scripts/v2_app.sh ai-agent --json --auto-start-daemon 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'`
|
||||||
|
|
||||||
|
## Stop Conditions
|
||||||
|
|
||||||
|
- If startup, ingest, or retrieve fails due to backend regression, log the failure and pause AI iteration until fixed.
|
||||||
|
- Do not switch scope to broad backend cleanup without an explicit decision.
|
||||||
65
docs/index-backend-repro.md
Normal file
65
docs/index-backend-repro.md
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
# Index Backend Verification (Isolated)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
The previously reported isolated index-backend write failure is not reproducing on
|
||||||
|
current builds.
|
||||||
|
|
||||||
|
Verified on **2026-02-08** with a fresh store root:
|
||||||
|
|
||||||
|
- `POST /v2/graph/nodes` (node 1): HTTP 200
|
||||||
|
- `POST /v2/graph/nodes` (node 2): HTTP 200
|
||||||
|
- `POST /v2/graph/edges` (edge doc->topic): HTTP 200
|
||||||
|
- Control run on `--store-backend fs`: same sequence HTTP 200
|
||||||
|
- `./scripts/smoke_v2.sh` on index: PASS
|
||||||
|
- `./tests/integration_v2.sh` on index: PASS
|
||||||
|
|
||||||
|
## Re-Verification Procedure
|
||||||
|
|
||||||
|
### 1) Start index backend (isolated root + socket)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
STORE_BACKEND=index INDEX_BACKEND_PROBE=0 \
|
||||||
|
STORE_ROOT=/tmp/amduat-asl-index-iso \
|
||||||
|
SOCK=/tmp/amduatd-index-iso.sock SPACE=app1 \
|
||||||
|
./scripts/dev_start_daemon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2) Execute direct API calls
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \
|
||||||
|
-H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \
|
||||||
|
-X POST --data-binary '{"name":"doca1"}' \
|
||||||
|
http://localhost/v2/graph/nodes
|
||||||
|
|
||||||
|
curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \
|
||||||
|
-H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \
|
||||||
|
-X POST --data-binary '{"name":"topica1"}' \
|
||||||
|
http://localhost/v2/graph/nodes
|
||||||
|
|
||||||
|
curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \
|
||||||
|
-H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \
|
||||||
|
-X POST --data-binary '{"subject":"doca1","predicate":"ms.within_domain","object":"topica1","provenance":{"source_uri":"urn:t","extractor":"x","observed_at":1,"ingested_at":2,"trace_id":"t1"}}' \
|
||||||
|
http://localhost/v2/graph/edges
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: all three requests return HTTP 200.
|
||||||
|
|
||||||
|
### 3) Control check (fs backend)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
STORE_BACKEND=fs STORE_ROOT=/tmp/amduat-asl-fs-iso \
|
||||||
|
SOCK=/tmp/amduatd-fs-iso.sock SPACE=app1 \
|
||||||
|
./scripts/dev_start_daemon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the same three API calls. Expected: HTTP 200 for all calls.
|
||||||
|
|
||||||
|
## Regression Gate Recommendation
|
||||||
|
|
||||||
|
Use these as quick confidence checks when updating `vendor/amduat-api`:
|
||||||
|
|
||||||
|
- isolated 3-call index write sequence above
|
||||||
|
- `./scripts/smoke_v2.sh` on index backend
|
||||||
|
- `./tests/integration_v2.sh` on index backend
|
||||||
|
|
@ -273,3 +273,19 @@ curl --unix-socket "${SOCK}" -sS -X POST "${BASE}/v2/graph/edges/tombstone" \
|
||||||
-H "X-Amduat-Space: ${SPACE}" \
|
-H "X-Amduat-Space: ${SPACE}" \
|
||||||
-d "{\"edge_ref\":\"${EDGE_REF}\"}"
|
-d "{\"edge_ref\":\"${EDGE_REF}\"}"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 11) AI Answer Wrapper (Grounded)
|
||||||
|
|
||||||
|
For local app usage via this scaffold:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./scripts/v2_app.sh ai-answer 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
|
||||||
|
./scripts/v2_app.sh ai-answer --json 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
|
||||||
|
./scripts/v2_app.sh ai-answer --json --require-evidence 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
|
||||||
|
```
|
||||||
|
|
||||||
|
Behavior notes:
|
||||||
|
|
||||||
|
- The command retrieves graph context first (`/v2/graph/retrieve` with `/v2/graph/subgraph` fallback).
|
||||||
|
- JSON output includes an `evidence[]` array with normalized triplets and refs (`predicate_ref` retained, `predicate_name` preferred when resolvable).
|
||||||
|
- `--require-evidence` enforces strict grounding: command exits non-zero when no supporting edges are found.
|
||||||
|
|
|
||||||
344
scripts/ai_agent_loop.sh
Executable file
344
scripts/ai_agent_loop.sh
Executable file
|
|
@ -0,0 +1,344 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "${ROOT_DIR}/src/app_v2.sh"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<USAGE
|
||||||
|
usage: $0 [--json] [--require-evidence] [--max-steps N] [--state-file PATH] [--auto-start-daemon] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
|
||||||
|
|
||||||
|
Minimal agent loop v1:
|
||||||
|
1) retrieve context
|
||||||
|
2) ask model for next action (answer/refine_query/stop)
|
||||||
|
3) optionally refine roots/goals and repeat
|
||||||
|
4) produce grounded answer and persist run checkpoint
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--json print full final JSON payload
|
||||||
|
--require-evidence fail if no supporting edges are found
|
||||||
|
--max-steps N planner iterations before answer fallback (default: 3)
|
||||||
|
--state-file PATH write run state to this path
|
||||||
|
--auto-start-daemon start daemon if startup checks fail
|
||||||
|
USAGE
|
||||||
|
}
|
||||||
|
|
||||||
|
require_jq() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "ai_agent_loop.sh: jq is required" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_daemon_ready() {
|
||||||
|
if app_startup_checks >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${auto_start_daemon}" == "1" ]]; then
|
||||||
|
local daemon_backend="${AI_DAEMON_STORE_BACKEND:-fs}"
|
||||||
|
local daemon_root="${AI_DAEMON_STORE_ROOT:-/tmp/amduat-asl-ai-agent}"
|
||||||
|
local daemon_log="${AI_DAEMON_LOG_PATH:-/tmp/ai-agent-daemon.log}"
|
||||||
|
echo "daemon not reachable; attempting startup via scripts/dev_start_daemon.sh" >&2
|
||||||
|
STORE_BACKEND="${daemon_backend}" STORE_ROOT="${daemon_root}" SOCK="${SOCK}" SPACE="${SPACE}" \
|
||||||
|
nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${daemon_log}" 2>&1 &
|
||||||
|
local daemon_boot_pid="$!"
|
||||||
|
disown "${daemon_boot_pid}" 2>/dev/null || true
|
||||||
|
local i
|
||||||
|
for i in $(seq 1 80); do
|
||||||
|
if app_startup_checks >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
app_startup_checks >/dev/null 2>&1 || {
|
||||||
|
echo "ai_agent_loop.sh: daemon still unreachable after startup attempt" >&2
|
||||||
|
echo "see ${daemon_log} for startup logs" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "ai_agent_loop.sh: daemon unreachable on SOCK=${SOCK}" >&2
|
||||||
|
echo "hint: run ./scripts/dev_start_daemon.sh or pass --auto-start-daemon" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
append_step() {
|
||||||
|
local step_json="$1"
|
||||||
|
steps_json="$(jq -c --argjson step "${step_json}" '. + [$step]' <<<"${steps_json}")"
|
||||||
|
}
|
||||||
|
|
||||||
|
write_run_state() {
|
||||||
|
local status="$1"
|
||||||
|
local stop_reason_value="$2"
|
||||||
|
local final_answer_value="$3"
|
||||||
|
|
||||||
|
local now_iso
|
||||||
|
now_iso="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
||||||
|
local completed_at=""
|
||||||
|
if [[ "${status}" == "completed" ]]; then
|
||||||
|
completed_at="${now_iso}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
local run_json_local
|
||||||
|
run_json_local="$(jq -nc \
|
||||||
|
--arg run_id "${run_id}" \
|
||||||
|
--arg started_at "${started_at}" \
|
||||||
|
--arg updated_at "${now_iso}" \
|
||||||
|
--arg completed_at "${completed_at}" \
|
||||||
|
--arg status "${status}" \
|
||||||
|
--arg question "${question}" \
|
||||||
|
--arg initial_roots_csv "${initial_roots_csv}" \
|
||||||
|
--arg initial_goals_csv "${initial_goals_csv}" \
|
||||||
|
--arg final_roots_csv "${roots_csv}" \
|
||||||
|
--arg final_goals_csv "${goals_csv}" \
|
||||||
|
--arg stop_reason "${stop_reason_value}" \
|
||||||
|
--argjson current_step "${step_no}" \
|
||||||
|
--argjson max_steps "${max_steps}" \
|
||||||
|
--argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" \
|
||||||
|
--argjson steps "${steps_json}" \
|
||||||
|
--argjson final_answer "${final_answer_value}" \
|
||||||
|
'{
|
||||||
|
run_id:$run_id,
|
||||||
|
status:$status,
|
||||||
|
started_at:$started_at,
|
||||||
|
updated_at:$updated_at,
|
||||||
|
completed_at:(if $completed_at == "" then null else $completed_at end),
|
||||||
|
input:{
|
||||||
|
question:$question,
|
||||||
|
roots_csv:$initial_roots_csv,
|
||||||
|
goals_csv:$initial_goals_csv,
|
||||||
|
require_evidence:$require_evidence
|
||||||
|
},
|
||||||
|
planner:{
|
||||||
|
current_step:$current_step,
|
||||||
|
max_steps:$max_steps
|
||||||
|
},
|
||||||
|
final_query:{
|
||||||
|
roots_csv:$final_roots_csv,
|
||||||
|
goals_csv:$final_goals_csv
|
||||||
|
},
|
||||||
|
stop_reason:$stop_reason,
|
||||||
|
steps:$steps,
|
||||||
|
final_answer:$final_answer
|
||||||
|
}')"
|
||||||
|
|
||||||
|
printf '%s\n' "${run_json_local}" > "${state_file}"
|
||||||
|
RUN_JSON="${run_json_local}"
|
||||||
|
}
|
||||||
|
|
||||||
|
extract_plan_json() {
|
||||||
|
local model_out="$1"
|
||||||
|
local raw_plan
|
||||||
|
raw_plan="$(jq -r '.response // ""' <<<"${model_out}")"
|
||||||
|
local normalized_plan
|
||||||
|
normalized_plan="$(printf '%s\n' "${raw_plan}" \
|
||||||
|
| sed -e '1s/^```[[:alnum:]_-]*[[:space:]]*$//' -e '$s/^```[[:space:]]*$//')"
|
||||||
|
local parsed_plan
|
||||||
|
parsed_plan="$(printf '%s' "${normalized_plan}" | jq -c '
|
||||||
|
if type == "object" then .
|
||||||
|
else {"action":"answer","reason":"planner_non_object"}
|
||||||
|
end
|
||||||
|
' 2>/dev/null || printf '%s' '{"action":"answer","reason":"planner_parse_error"}')"
|
||||||
|
jq -c '
|
||||||
|
def clean_csv(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
|
||||||
|
. as $r
|
||||||
|
| {
|
||||||
|
action: (
|
||||||
|
($r.action // "answer" | tostring) as $a
|
||||||
|
| if ($a == "answer" or $a == "refine_query" or $a == "stop") then $a else "answer" end
|
||||||
|
),
|
||||||
|
next_roots_csv: clean_csv($r.next_roots_csv // ""),
|
||||||
|
next_goals_csv: clean_csv($r.next_goals_csv // ""),
|
||||||
|
reason: clean_csv($r.reason // "")
|
||||||
|
}
|
||||||
|
' <<<"${parsed_plan}"
|
||||||
|
}
|
||||||
|
|
||||||
|
plan_next_action() {
|
||||||
|
local question="$1"
|
||||||
|
local roots_csv="$2"
|
||||||
|
local goals_csv="$3"
|
||||||
|
local retrieve_json="$4"
|
||||||
|
local step_no="$5"
|
||||||
|
|
||||||
|
local context_stats
|
||||||
|
context_stats="$(jq -c '{nodes:(.nodes // [] | length), edges:(.edges // [] | length)}' <<<"${retrieve_json}")"
|
||||||
|
|
||||||
|
local prompt
|
||||||
|
prompt="$(cat <<PROMPT
|
||||||
|
You are an execution planner for a graph-grounded QA agent.
|
||||||
|
Decide the NEXT action only.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Return STRICT JSON object only.
|
||||||
|
- action must be one of: "answer", "refine_query", "stop".
|
||||||
|
- Use "refine_query" only if retrieval context is clearly insufficient.
|
||||||
|
- Keep next_roots_csv / next_goals_csv empty unless refining.
|
||||||
|
- Do not include markdown or prose outside JSON.
|
||||||
|
|
||||||
|
Current step: ${step_no}
|
||||||
|
Question: ${question}
|
||||||
|
Current roots_csv: ${roots_csv}
|
||||||
|
Current goals_csv: ${goals_csv}
|
||||||
|
Context stats: ${context_stats}
|
||||||
|
|
||||||
|
Required JSON schema:
|
||||||
|
{"action":"answer|refine_query|stop","next_roots_csv":"","next_goals_csv":"","reason":""}
|
||||||
|
PROMPT
|
||||||
|
)"
|
||||||
|
|
||||||
|
local plan_model_out
|
||||||
|
plan_model_out="$(app_ai_generate_json "${prompt}")"
|
||||||
|
extract_plan_json "${plan_model_out}"
|
||||||
|
}
|
||||||
|
|
||||||
|
output_mode="text"
|
||||||
|
require_evidence=0
|
||||||
|
max_steps=3
|
||||||
|
auto_start_daemon=0
|
||||||
|
state_file=""
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--json)
|
||||||
|
output_mode="json"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--require-evidence)
|
||||||
|
require_evidence=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--max-steps)
|
||||||
|
[[ $# -ge 2 ]] || { usage >&2; exit 2; }
|
||||||
|
max_steps="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--state-file)
|
||||||
|
[[ $# -ge 2 ]] || { usage >&2; exit 2; }
|
||||||
|
state_file="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--auto-start-daemon)
|
||||||
|
auto_start_daemon=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--)
|
||||||
|
shift
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
usage >&2
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $# -lt 2 || $# -gt 3 ]]; then
|
||||||
|
usage >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
roots_csv="$1"
|
||||||
|
question="$2"
|
||||||
|
goals_csv="${3:-}"
|
||||||
|
|
||||||
|
if ! [[ "${max_steps}" =~ ^[0-9]+$ ]] || [[ "${max_steps}" -lt 1 ]] || [[ "${max_steps}" -gt 8 ]]; then
|
||||||
|
echo "ai_agent_loop.sh: --max-steps must be integer in [1,8]" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
require_jq
|
||||||
|
app_init
|
||||||
|
ensure_daemon_ready
|
||||||
|
|
||||||
|
run_id="$(date +%Y%m%d-%H%M%S)-$$"
|
||||||
|
started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
||||||
|
if [[ -z "${state_file}" ]]; then
|
||||||
|
mkdir -p "${ROOT_DIR}/ai/runs"
|
||||||
|
state_file="${ROOT_DIR}/ai/runs/agent-run-${run_id}.json"
|
||||||
|
fi
|
||||||
|
|
||||||
|
steps_json="[]"
|
||||||
|
final_answer_json=""
|
||||||
|
stop_reason="max_steps_reached"
|
||||||
|
initial_roots_csv="${roots_csv}"
|
||||||
|
initial_goals_csv="${goals_csv}"
|
||||||
|
RUN_JSON=""
|
||||||
|
|
||||||
|
step_no=1
|
||||||
|
write_run_state "running" "${stop_reason}" "null"
|
||||||
|
while (( step_no <= max_steps )); do
|
||||||
|
retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")" || {
|
||||||
|
stop_reason="retrieve_failed"
|
||||||
|
write_run_state "running" "${stop_reason}" "null"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
context_stats="$(jq -c '{nodes:(.nodes // [] | length), edges:(.edges // [] | length)}' <<<"${retrieve_out}")"
|
||||||
|
|
||||||
|
plan_json="$(plan_next_action "${question}" "${roots_csv}" "${goals_csv}" "${retrieve_out}" "${step_no}")"
|
||||||
|
plan_action="$(jq -r '.action' <<<"${plan_json}")"
|
||||||
|
next_roots="$(jq -r '.next_roots_csv // ""' <<<"${plan_json}")"
|
||||||
|
next_goals="$(jq -r '.next_goals_csv // ""' <<<"${plan_json}")"
|
||||||
|
|
||||||
|
step_record="$(jq -nc \
|
||||||
|
--argjson step "${step_no}" \
|
||||||
|
--arg roots_csv "${roots_csv}" \
|
||||||
|
--arg goals_csv "${goals_csv}" \
|
||||||
|
--argjson context "${context_stats}" \
|
||||||
|
--argjson plan "${plan_json}" \
|
||||||
|
'{step:$step,roots_csv:$roots_csv,goals_csv:$goals_csv,context:$context,plan:$plan}')"
|
||||||
|
append_step "${step_record}"
|
||||||
|
write_run_state "running" "${stop_reason}" "null"
|
||||||
|
|
||||||
|
if [[ "${plan_action}" == "refine_query" ]]; then
|
||||||
|
if [[ -n "${next_roots}" ]]; then
|
||||||
|
roots_csv="${next_roots}"
|
||||||
|
fi
|
||||||
|
if [[ -n "${next_goals}" ]]; then
|
||||||
|
goals_csv="${next_goals}"
|
||||||
|
fi
|
||||||
|
step_no=$(( step_no + 1 ))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${plan_action}" == "stop" ]]; then
|
||||||
|
stop_reason="planner_stop"
|
||||||
|
write_run_state "running" "${stop_reason}" "null"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
if final_answer_json="$(app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}")"; then
|
||||||
|
stop_reason="answered"
|
||||||
|
write_run_state "running" "${stop_reason}" "${final_answer_json}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
stop_reason="answer_failed"
|
||||||
|
write_run_state "running" "${stop_reason}" "null"
|
||||||
|
break
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -z "${final_answer_json}" ]]; then
|
||||||
|
final_answer_json="$(jq -nc --arg msg "Agent loop ended without answer (${stop_reason})." '{response:$msg,done_reason:"agent_stopped"}')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
write_run_state "completed" "${stop_reason}" "${final_answer_json}"
|
||||||
|
run_json="${RUN_JSON}"
|
||||||
|
|
||||||
|
if [[ "${output_mode}" == "json" ]]; then
|
||||||
|
printf '%s\n' "${run_json}"
|
||||||
|
else
|
||||||
|
jq -r '.final_answer.response // "No response"' <<<"${run_json}"
|
||||||
|
echo
|
||||||
|
echo "state_file=${state_file}"
|
||||||
|
fi
|
||||||
141
scripts/ai_vertical_slice.sh
Executable file
141
scripts/ai_vertical_slice.sh
Executable file
|
|
@ -0,0 +1,141 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "${ROOT_DIR}/src/app_v2.sh"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<USAGE
|
||||||
|
usage: $0 [--skip-evals] [--auto-start-daemon]
|
||||||
|
|
||||||
|
Runs the AI vertical slice:
|
||||||
|
1) startup checks
|
||||||
|
2) seed graph ingest from ai/fixtures/seed_batch.json
|
||||||
|
3) retrieve grounding context
|
||||||
|
4) generate grounded answer (require evidence)
|
||||||
|
5) optional eval scripts
|
||||||
|
|
||||||
|
Environment overrides:
|
||||||
|
AI_SLICE_FIXTURE_PATH (default: ai/fixtures/seed_batch.json)
|
||||||
|
AI_SLICE_ROOTS_CSV (default: doc-ai-1)
|
||||||
|
AI_SLICE_GOALS_CSV (default: ms.within_domain)
|
||||||
|
AI_SLICE_QUESTION (default: What domain is doc-ai-1 in?)
|
||||||
|
AI_SLICE_SKIP_EVALS (default: 0)
|
||||||
|
AI_SLICE_AUTO_START_DAEMON (default: 0)
|
||||||
|
USAGE
|
||||||
|
}
|
||||||
|
|
||||||
|
require_jq() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "ai_vertical_slice.sh: jq is required" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
skip_evals="${AI_SLICE_SKIP_EVALS:-0}"
|
||||||
|
auto_start_daemon="${AI_SLICE_AUTO_START_DAEMON:-0}"
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--skip-evals)
|
||||||
|
skip_evals=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--auto-start-daemon)
|
||||||
|
auto_start_daemon=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage >&2
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
fixture_path="${AI_SLICE_FIXTURE_PATH:-${ROOT_DIR}/ai/fixtures/seed_batch.json}"
|
||||||
|
roots_csv="${AI_SLICE_ROOTS_CSV:-doc-ai-1}"
|
||||||
|
goals_csv="${AI_SLICE_GOALS_CSV:-ms.within_domain}"
|
||||||
|
question="${AI_SLICE_QUESTION:-What domain is doc-ai-1 in?}"
|
||||||
|
|
||||||
|
[[ -f "${fixture_path}" ]] || {
|
||||||
|
echo "ai_vertical_slice.sh: fixture not found: ${fixture_path}" >&2
|
||||||
|
exit 2
|
||||||
|
}
|
||||||
|
|
||||||
|
require_jq
|
||||||
|
app_init
|
||||||
|
|
||||||
|
ensure_daemon_ready() {
|
||||||
|
if app_startup_checks >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${auto_start_daemon}" == "1" ]]; then
|
||||||
|
local daemon_backend="${AI_DAEMON_STORE_BACKEND:-fs}"
|
||||||
|
local daemon_root="${AI_DAEMON_STORE_ROOT:-/tmp/amduat-asl-ai-slice}"
|
||||||
|
local daemon_log="${AI_DAEMON_LOG_PATH:-/tmp/ai-vertical-slice-daemon.log}"
|
||||||
|
echo "daemon not reachable; attempting startup via scripts/dev_start_daemon.sh" >&2
|
||||||
|
STORE_BACKEND="${daemon_backend}" STORE_ROOT="${daemon_root}" SOCK="${SOCK}" SPACE="${SPACE}" \
|
||||||
|
nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${daemon_log}" 2>&1 &
|
||||||
|
local daemon_boot_pid="$!"
|
||||||
|
disown "${daemon_boot_pid}" 2>/dev/null || true
|
||||||
|
local i
|
||||||
|
for i in $(seq 1 80); do
|
||||||
|
if app_startup_checks >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
app_startup_checks >/dev/null 2>&1 || {
|
||||||
|
echo "ai_vertical_slice.sh: daemon still unreachable after startup attempt" >&2
|
||||||
|
echo "see ${daemon_log} for startup logs" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "ai_vertical_slice.sh: daemon unreachable on SOCK=${SOCK}" >&2
|
||||||
|
echo "hint: run ./scripts/dev_start_daemon.sh or pass --auto-start-daemon" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_daemon_ready
|
||||||
|
|
||||||
|
echo "== startup-check =="
|
||||||
|
app_startup_checks | jq .
|
||||||
|
|
||||||
|
echo "== ingest fixture =="
|
||||||
|
idempotency_key="ai-slice-$(date +%s)"
|
||||||
|
payload="$(jq -c --arg k "${idempotency_key}" '.idempotency_key = $k' "${fixture_path}")"
|
||||||
|
ingest_out="$(app_ingest_batch "${payload}")"
|
||||||
|
printf '%s\n' "${ingest_out}" | jq .
|
||||||
|
|
||||||
|
echo "== retrieve context =="
|
||||||
|
retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")"
|
||||||
|
printf '%s\n' "${retrieve_out}" | jq .
|
||||||
|
printf '%s' "${retrieve_out}" | jq -e '((.nodes // []) | length) > 0 and ((.edges // []) | length) > 0' >/dev/null || {
|
||||||
|
echo "ai_vertical_slice.sh: retrieve produced no graph context" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "== grounded answer =="
|
||||||
|
answer_out="$(app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "1")"
|
||||||
|
printf '%s\n' "${answer_out}" | jq .
|
||||||
|
printf '%s' "${answer_out}" | jq -e '.grounding.has_evidence == true and (.response | type == "string" and length > 0)' >/dev/null || {
|
||||||
|
echo "ai_vertical_slice.sh: answer was not grounded with evidence" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ "${skip_evals}" != "1" ]]; then
|
||||||
|
echo "== evals =="
|
||||||
|
"${ROOT_DIR}/tests/ai_eval.sh"
|
||||||
|
"${ROOT_DIR}/tests/ai_answer_eval.sh"
|
||||||
|
else
|
||||||
|
echo "== evals skipped =="
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "ai_vertical_slice.sh: PASS"
|
||||||
|
|
@ -8,6 +8,10 @@ override_space="${SPACE:-}"
|
||||||
override_sock="${SOCK:-}"
|
override_sock="${SOCK:-}"
|
||||||
override_amduatd_bin="${AMDUATD_BIN:-}"
|
override_amduatd_bin="${AMDUATD_BIN:-}"
|
||||||
override_asl_bin="${ASL_BIN:-}"
|
override_asl_bin="${ASL_BIN:-}"
|
||||||
|
override_index_backend_probe="${INDEX_BACKEND_PROBE:-}"
|
||||||
|
override_index_backend_fallback="${INDEX_BACKEND_FALLBACK:-}"
|
||||||
|
override_index_backend_repair="${INDEX_BACKEND_REPAIR:-}"
|
||||||
|
override_fs_fallback_store_root="${FS_FALLBACK_STORE_ROOT:-}"
|
||||||
|
|
||||||
ENV_FILE="${ROOT_DIR}/config/env.local"
|
ENV_FILE="${ROOT_DIR}/config/env.local"
|
||||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||||
|
|
@ -22,6 +26,10 @@ if [[ -n "${override_space}" ]]; then SPACE="${override_space}"; fi
|
||||||
if [[ -n "${override_sock}" ]]; then SOCK="${override_sock}"; fi
|
if [[ -n "${override_sock}" ]]; then SOCK="${override_sock}"; fi
|
||||||
if [[ -n "${override_amduatd_bin}" ]]; then AMDUATD_BIN="${override_amduatd_bin}"; fi
|
if [[ -n "${override_amduatd_bin}" ]]; then AMDUATD_BIN="${override_amduatd_bin}"; fi
|
||||||
if [[ -n "${override_asl_bin}" ]]; then ASL_BIN="${override_asl_bin}"; fi
|
if [[ -n "${override_asl_bin}" ]]; then ASL_BIN="${override_asl_bin}"; fi
|
||||||
|
if [[ -n "${override_index_backend_probe}" ]]; then INDEX_BACKEND_PROBE="${override_index_backend_probe}"; fi
|
||||||
|
if [[ -n "${override_index_backend_fallback}" ]]; then INDEX_BACKEND_FALLBACK="${override_index_backend_fallback}"; fi
|
||||||
|
if [[ -n "${override_index_backend_repair}" ]]; then INDEX_BACKEND_REPAIR="${override_index_backend_repair}"; fi
|
||||||
|
if [[ -n "${override_fs_fallback_store_root}" ]]; then FS_FALLBACK_STORE_ROOT="${override_fs_fallback_store_root}"; fi
|
||||||
|
|
||||||
STORE_ROOT="${STORE_ROOT:-${ROOT_DIR}/.amduat-asl}"
|
STORE_ROOT="${STORE_ROOT:-${ROOT_DIR}/.amduat-asl}"
|
||||||
STORE_BACKEND="${STORE_BACKEND:-index}"
|
STORE_BACKEND="${STORE_BACKEND:-index}"
|
||||||
|
|
@ -29,6 +37,7 @@ SPACE="${SPACE:-app1}"
|
||||||
SOCK="${SOCK:-${ROOT_DIR}/amduatd.sock}"
|
SOCK="${SOCK:-${ROOT_DIR}/amduatd.sock}"
|
||||||
INDEX_BACKEND_PROBE="${INDEX_BACKEND_PROBE:-1}"
|
INDEX_BACKEND_PROBE="${INDEX_BACKEND_PROBE:-1}"
|
||||||
INDEX_BACKEND_FALLBACK="${INDEX_BACKEND_FALLBACK:-fs}"
|
INDEX_BACKEND_FALLBACK="${INDEX_BACKEND_FALLBACK:-fs}"
|
||||||
|
INDEX_BACKEND_REPAIR="${INDEX_BACKEND_REPAIR:-1}"
|
||||||
FS_FALLBACK_STORE_ROOT="${FS_FALLBACK_STORE_ROOT:-${STORE_ROOT}-fs}"
|
FS_FALLBACK_STORE_ROOT="${FS_FALLBACK_STORE_ROOT:-${STORE_ROOT}-fs}"
|
||||||
|
|
||||||
if [[ "${STORE_ROOT}" != /* ]]; then STORE_ROOT="${ROOT_DIR}/${STORE_ROOT}"; fi
|
if [[ "${STORE_ROOT}" != /* ]]; then STORE_ROOT="${ROOT_DIR}/${STORE_ROOT}"; fi
|
||||||
|
|
@ -106,26 +115,95 @@ wait_ready() {
|
||||||
probe_index_write_path() {
|
probe_index_write_path() {
|
||||||
local sock="$1"
|
local sock="$1"
|
||||||
local space="$2"
|
local space="$2"
|
||||||
|
local probe_idx
|
||||||
|
for probe_idx in 1 2 3 4 5; do
|
||||||
local run_id
|
local run_id
|
||||||
run_id="$(date +%s)"
|
run_id="$(date +%s%N)${RANDOM}${probe_idx}"
|
||||||
local doc="probe-doc-${run_id}"
|
# Keep probe names conservative (alnum only) across backend/name-policy variants.
|
||||||
local topic="probe-topic-${run_id}"
|
local doc="probe${run_id}a"
|
||||||
|
local topic="probe${run_id}b"
|
||||||
local payload
|
local payload
|
||||||
payload="$(cat <<JSON
|
payload="$(cat <<JSON
|
||||||
{"idempotency_key":"probe-${run_id}","mode":"continue_on_error","nodes":[{"name":"${doc}"},{"name":"${topic}"}],"edges":[{"subject":"${doc}","predicate":"ms.within_domain","object":"${topic}","provenance":{"source_uri":"urn:probe","extractor":"dev-start","observed_at":1,"ingested_at":2,"trace_id":"probe-${run_id}"}}]}
|
{
|
||||||
|
"idempotency_key":"probe-${run_id}",
|
||||||
|
"mode":"continue_on_error",
|
||||||
|
"nodes":[{"name":"${doc}"},{"name":"${topic}"}],
|
||||||
|
"edges":[
|
||||||
|
{
|
||||||
|
"subject":"${doc}",
|
||||||
|
"predicate":"ms.within_domain",
|
||||||
|
"object":"${topic}",
|
||||||
|
"provenance":{
|
||||||
|
"source_uri":"urn:probe:index",
|
||||||
|
"extractor":"dev-start-probe",
|
||||||
|
"observed_at":1,
|
||||||
|
"ingested_at":2,
|
||||||
|
"trace_id":"probe-${run_id}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
JSON
|
JSON
|
||||||
)"
|
)"
|
||||||
local out
|
local raw out code
|
||||||
out="$(curl --globoff --silent --show-error --unix-socket "${sock}" \
|
raw="$(curl --globoff --silent --show-error --unix-socket "${sock}" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "X-Amduat-Space: ${space}" \
|
-H "X-Amduat-Space: ${space}" \
|
||||||
-X POST --data-binary "${payload}" \
|
-X POST --data-binary "${payload}" \
|
||||||
|
-w $'\n%{http_code}' \
|
||||||
"http://localhost/v2/graph/batch")" || return 1
|
"http://localhost/v2/graph/batch")" || return 1
|
||||||
if [[ "${out}" == *'"ok":true'* ]]; then
|
code="${raw##*$'\n'}"
|
||||||
return 0
|
out="${raw%$'\n'*}"
|
||||||
fi
|
|
||||||
echo "index probe response: ${out}" >&2
|
if [[ "${code}" != "200" ]]; then
|
||||||
|
echo "index probe HTTP ${code}: ${out}" >&2
|
||||||
return 1
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# A successful backend health check is "node+edge write path is healthy for repeated valid payloads".
|
||||||
|
if command -v jq >/dev/null 2>&1; then
|
||||||
|
if ! printf '%s' "${out}" | jq -e '.' >/dev/null 2>&1; then
|
||||||
|
echo "index probe returned non-JSON payload: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; ((.code // 0) | tonumber) >= 500)' >/dev/null 2>&1; then
|
||||||
|
echo "index probe saw server error result: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; (.status == "error") or (((.code // 0) | tonumber) >= 400))' >/dev/null 2>&1; then
|
||||||
|
echo "index probe saw non-success result: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if ! printf '%s' "${out}" | jq -e '([(.results // [] )[]? | select(.kind == "node" and .status == "applied" and ((.code // 0) | tonumber) < 300)] | length) >= 2' >/dev/null 2>&1; then
|
||||||
|
echo "index probe missing applied node writes: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if ! printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; .kind == "edge" and .status == "applied" and ((.code // 0) | tonumber) < 300)' >/dev/null 2>&1; then
|
||||||
|
echo "index probe missing applied edge write: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if ! printf '%s' "${out}" | jq -e '.ok == true' >/dev/null 2>&1; then
|
||||||
|
echo "index probe non-ok payload: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${out}" == *'"ok":true'* ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if [[ "${out}" == *'"code":5'* ]]; then
|
||||||
|
echo "index probe saw 5xx result: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if [[ "${out}" == *'"ok":false'* || "${out}" == *'"status":"error"'* ]]; then
|
||||||
|
echo "index probe non-ok payload: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
echo "index probe unexpected payload: ${out}" >&2
|
||||||
|
return 1
|
||||||
|
done
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
run_daemon_foreground() {
|
run_daemon_foreground() {
|
||||||
|
|
@ -135,19 +213,41 @@ run_daemon_foreground() {
|
||||||
exec "${AMDUATD_BIN}" --root "${root}" --sock "${SOCK}" --store-backend "${backend}" --space "${SPACE}"
|
exec "${AMDUATD_BIN}" --root "${root}" --sock "${SOCK}" --store-backend "${backend}" --space "${SPACE}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
start_probe_daemon() {
|
||||||
|
echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2
|
||||||
|
"${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" &
|
||||||
|
daemon_pid=$!
|
||||||
|
cleanup_probe() {
|
||||||
|
kill "${daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
trap cleanup_probe EXIT
|
||||||
|
}
|
||||||
|
|
||||||
|
stop_probe_daemon() {
|
||||||
|
kill "${daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
wait "${daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
trap - EXIT
|
||||||
|
}
|
||||||
|
|
||||||
|
repair_index_store() {
|
||||||
|
local root="$1"
|
||||||
|
local backup_root="${root}.bak-$(date +%Y%m%d-%H%M%S)"
|
||||||
|
if [[ -d "${root}" ]]; then
|
||||||
|
echo "backing up index store root to ${backup_root}" >&2
|
||||||
|
mv "${root}" "${backup_root}"
|
||||||
|
fi
|
||||||
|
mkdir -p "${root}"
|
||||||
|
echo "reinitializing index-backed ASL store at ${root}" >&2
|
||||||
|
"${ASL_BIN}" index init --root "${root}" --force
|
||||||
|
}
|
||||||
|
|
||||||
init_store "${STORE_BACKEND}" "${STORE_ROOT}"
|
init_store "${STORE_BACKEND}" "${STORE_ROOT}"
|
||||||
|
|
||||||
if [[ "${STORE_BACKEND}" != "index" || "${INDEX_BACKEND_PROBE}" != "1" ]]; then
|
if [[ "${STORE_BACKEND}" != "index" || "${INDEX_BACKEND_PROBE}" != "1" ]]; then
|
||||||
run_daemon_foreground "${STORE_BACKEND}" "${STORE_ROOT}"
|
run_daemon_foreground "${STORE_BACKEND}" "${STORE_ROOT}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2
|
start_probe_daemon
|
||||||
"${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" &
|
|
||||||
daemon_pid=$!
|
|
||||||
cleanup_probe() {
|
|
||||||
kill "${daemon_pid}" >/dev/null 2>&1 || true
|
|
||||||
}
|
|
||||||
trap cleanup_probe EXIT
|
|
||||||
|
|
||||||
fallback_to_fs=0
|
fallback_to_fs=0
|
||||||
if ! wait_ready "${SOCK}"; then
|
if ! wait_ready "${SOCK}"; then
|
||||||
|
|
@ -164,9 +264,27 @@ if [[ "${fallback_to_fs}" == "0" ]]; then
|
||||||
exit $?
|
exit $?
|
||||||
fi
|
fi
|
||||||
|
|
||||||
kill "${daemon_pid}" >/dev/null 2>&1 || true
|
stop_probe_daemon
|
||||||
wait "${daemon_pid}" >/dev/null 2>&1 || true
|
|
||||||
trap - EXIT
|
if [[ "${INDEX_BACKEND_REPAIR}" == "1" ]]; then
|
||||||
|
echo "attempting index store repair before fs fallback" >&2
|
||||||
|
repair_index_store "${STORE_ROOT}"
|
||||||
|
start_probe_daemon
|
||||||
|
|
||||||
|
repaired_ok=0
|
||||||
|
if wait_ready "${SOCK}" && probe_index_write_path "${SOCK}" "${SPACE}"; then
|
||||||
|
repaired_ok=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${repaired_ok}" == "1" ]]; then
|
||||||
|
trap - EXIT
|
||||||
|
wait "${daemon_pid}"
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "index backend repair failed" >&2
|
||||||
|
stop_probe_daemon
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "${INDEX_BACKEND_FALLBACK}" != "fs" ]]; then
|
if [[ "${INDEX_BACKEND_FALLBACK}" != "fs" ]]; then
|
||||||
echo "set INDEX_BACKEND_FALLBACK=fs to auto-fallback, or INDEX_BACKEND_PROBE=0 to disable probe" >&2
|
echo "set INDEX_BACKEND_FALLBACK=fs to auto-fallback, or INDEX_BACKEND_PROBE=0 to disable probe" >&2
|
||||||
|
|
|
||||||
|
|
@ -24,17 +24,51 @@ step() {
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
|
if [[ "${smoke_started_daemon:-0}" == "1" && -n "${smoke_daemon_pid:-}" ]]; then
|
||||||
|
kill "${smoke_daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
wait "${smoke_daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
rm -f "${CURSOR_FILE}" >/dev/null 2>&1 || true
|
rm -f "${CURSOR_FILE}" >/dev/null 2>&1 || true
|
||||||
|
if [[ "${SMOKE_USE_EXISTING_DAEMON:-0}" != "1" ]]; then
|
||||||
|
rm -f "${smoke_sock:-}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
require_jq
|
require_jq
|
||||||
|
SMOKE_USE_EXISTING_DAEMON="${SMOKE_USE_EXISTING_DAEMON:-0}"
|
||||||
|
smoke_started_daemon=0
|
||||||
|
smoke_daemon_pid=""
|
||||||
|
smoke_root="${SMOKE_STORE_ROOT:-/tmp/amduat-asl-smoke-${USER:-user}}"
|
||||||
|
smoke_sock="${SMOKE_SOCK:-/tmp/amduatd-smoke-${USER:-user}.sock}"
|
||||||
|
smoke_backend="${SMOKE_STORE_BACKEND:-fs}"
|
||||||
|
smoke_log="${SMOKE_DAEMON_LOG_PATH:-/tmp/smoke-v2-daemon.log}"
|
||||||
|
|
||||||
|
if [[ "${SMOKE_USE_EXISTING_DAEMON}" != "1" ]]; then
|
||||||
|
rm -f "${smoke_sock}" >/dev/null 2>&1 || true
|
||||||
|
export SOCK="${smoke_sock}"
|
||||||
|
STORE_BACKEND="${smoke_backend}" STORE_ROOT="${smoke_root}" SOCK="${smoke_sock}" SPACE="${SPACE:-app1}" \
|
||||||
|
nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${smoke_log}" 2>&1 &
|
||||||
|
smoke_daemon_pid="$!"
|
||||||
|
smoke_started_daemon=1
|
||||||
|
|
||||||
|
ready=0
|
||||||
|
for _ in $(seq 1 120); do
|
||||||
|
if curl --globoff --silent --show-error --unix-socket "${smoke_sock}" "http://localhost/v2/readyz" >/dev/null 2>&1; then
|
||||||
|
ready=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
[[ "${ready}" == "1" ]] || fail "isolated daemon did not become ready (log: ${smoke_log})"
|
||||||
|
fi
|
||||||
|
|
||||||
app_init
|
app_init
|
||||||
|
|
||||||
run_id="$(date +%s)"
|
run_id="$(date +%s)"
|
||||||
idempotency_key="smoke-seed-${run_id}"
|
idempotency_key="smoke-seed-${run_id}"
|
||||||
doc_name="smoke-doc-${run_id}"
|
doc_name="smokedoc${run_id}"
|
||||||
topic_name="smoke-topic-${run_id}"
|
topic_name="smoketopic${run_id}"
|
||||||
goal_pred="ms.within_domain"
|
goal_pred="ms.within_domain"
|
||||||
|
|
||||||
step "startup"
|
step "startup"
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,11 @@ usage: $0 COMMAND [args]
|
||||||
|
|
||||||
commands:
|
commands:
|
||||||
startup-check
|
startup-check
|
||||||
|
ai-vertical-slice [--skip-evals] [--auto-start-daemon]
|
||||||
|
ai-agent [--json] [--require-evidence] [--max-steps N] [--state-file PATH] [--auto-start-daemon] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
|
||||||
|
ai-check
|
||||||
|
ai-generate [--json] PROMPT
|
||||||
|
ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
|
||||||
ingest PAYLOAD_JSON
|
ingest PAYLOAD_JSON
|
||||||
sync-once
|
sync-once
|
||||||
consume-changes [--once]
|
consume-changes [--once]
|
||||||
|
|
@ -34,6 +39,62 @@ case "${cmd}" in
|
||||||
startup-check)
|
startup-check)
|
||||||
app_startup_checks
|
app_startup_checks
|
||||||
;;
|
;;
|
||||||
|
ai-vertical-slice)
|
||||||
|
"${ROOT_DIR}/scripts/ai_vertical_slice.sh" "$@"
|
||||||
|
;;
|
||||||
|
ai-agent)
|
||||||
|
"${ROOT_DIR}/scripts/ai_agent_loop.sh" "$@"
|
||||||
|
;;
|
||||||
|
ai-check)
|
||||||
|
app_ai_check
|
||||||
|
;;
|
||||||
|
ai-generate)
|
||||||
|
output_mode="text"
|
||||||
|
if [[ $# -gt 0 && "$1" == "--json" ]]; then
|
||||||
|
output_mode="json"
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
if [[ $# -lt 1 ]]; then
|
||||||
|
echo "usage: $0 ai-generate [--json] PROMPT" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
if [[ "${output_mode}" == "json" ]]; then
|
||||||
|
app_ai_generate_json "$*"
|
||||||
|
else
|
||||||
|
app_ai_generate_text "$*"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
ai-answer)
|
||||||
|
output_mode="text"
|
||||||
|
require_evidence=0
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--json)
|
||||||
|
output_mode="json"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--require-evidence)
|
||||||
|
require_evidence=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
if [[ $# -lt 2 || $# -gt 3 ]]; then
|
||||||
|
echo "usage: $0 ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
roots_csv="$1"
|
||||||
|
question="$2"
|
||||||
|
goals_csv="${3:-}"
|
||||||
|
if [[ "${output_mode}" == "json" ]]; then
|
||||||
|
app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}"
|
||||||
|
else
|
||||||
|
app_ai_answer_text "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
ingest)
|
ingest)
|
||||||
if [[ $# -ne 1 ]]; then
|
if [[ $# -ne 1 ]]; then
|
||||||
echo "usage: $0 ingest PAYLOAD_JSON" >&2
|
echo "usage: $0 ingest PAYLOAD_JSON" >&2
|
||||||
|
|
|
||||||
305
src/app_v2.sh
305
src/app_v2.sh
|
|
@ -78,6 +78,7 @@ app_retrieve_with_fallback() {
|
||||||
printf '%s\n' "${AMDUAT_LAST_BODY}"
|
printf '%s\n' "${AMDUAT_LAST_BODY}"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
local retrieve_status="${AMDUAT_LAST_STATUS}"
|
||||||
|
|
||||||
local first_root
|
local first_root
|
||||||
first_root="$(printf '%s' "${roots_csv}" | awk -F',' '{gsub(/^ +| +$/, "", $1); printf "%s", $1}')"
|
first_root="$(printf '%s' "${roots_csv}" | awk -F',' '{gsub(/^ +| +$/, "", $1); printf "%s", $1}')"
|
||||||
|
|
@ -90,8 +91,19 @@ app_retrieve_with_fallback() {
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
amduat_api_call GET "${fallback_path}"
|
if amduat_api_call GET "${fallback_path}"; then
|
||||||
printf '%s\n' "${AMDUAT_LAST_BODY}"
|
printf '%s\n' "${AMDUAT_LAST_BODY}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local fallback_status="${AMDUAT_LAST_STATUS}"
|
||||||
|
|
||||||
|
if [[ "${retrieve_status}" == "404" && "${fallback_status}" == "404" ]]; then
|
||||||
|
# Return an explicit empty graph so callers can handle no-context deterministically.
|
||||||
|
printf '%s\n' '{"nodes":[],"edges":[],"stats":{"reason":"not_found"}}'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
app_tombstone_edge() {
|
app_tombstone_edge() {
|
||||||
|
|
@ -101,3 +113,294 @@ app_tombstone_edge() {
|
||||||
amduat_api_call POST "/v2/graph/edges/tombstone" "${payload}"
|
amduat_api_call POST "/v2/graph/edges/tombstone" "${payload}"
|
||||||
printf '%s\n' "${AMDUAT_LAST_BODY}"
|
printf '%s\n' "${AMDUAT_LAST_BODY}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
app_ai_check() {
|
||||||
|
local tags
|
||||||
|
tags="$(
|
||||||
|
curl --globoff --silent --show-error \
|
||||||
|
--connect-timeout "${CURL_CONNECT_TIMEOUT_SECONDS}" \
|
||||||
|
--max-time "${OLLAMA_TIMEOUT_SECONDS}" \
|
||||||
|
"${OLLAMA_HOST}/api/tags"
|
||||||
|
)" || {
|
||||||
|
echo "failed to reach ollama at ${OLLAMA_HOST}" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
printf '%s\n' "${tags}"
|
||||||
|
if [[ "${tags}" != *"\"name\":\"${OLLAMA_MODEL}\""* ]]; then
|
||||||
|
echo "warning: configured model not found in tags: ${OLLAMA_MODEL}" >&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_generate_json() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "jq is required for ai-generate" >&2
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
local prompt="$1"
|
||||||
|
local payload
|
||||||
|
payload="$(jq -nc --arg model "${OLLAMA_MODEL}" --arg prompt "${prompt}" \
|
||||||
|
'{model:$model,prompt:$prompt,stream:false}')"
|
||||||
|
|
||||||
|
local out
|
||||||
|
out="$(
|
||||||
|
curl --globoff --silent --show-error \
|
||||||
|
--connect-timeout "${CURL_CONNECT_TIMEOUT_SECONDS}" \
|
||||||
|
--max-time "${OLLAMA_TIMEOUT_SECONDS}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-X POST \
|
||||||
|
--data-binary "${payload}" \
|
||||||
|
"${OLLAMA_HOST}/api/generate"
|
||||||
|
)" || {
|
||||||
|
echo "failed to call ollama generate at ${OLLAMA_HOST}" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
printf '%s\n' "${out}"
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_generate() {
|
||||||
|
local prompt="$1"
|
||||||
|
app_ai_generate_json "${prompt}"
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_predicate_map_json() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "jq is required for ai-answer" >&2
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${APP_AI_PREDICATE_MAP_INIT:-0}" == "1" ]]; then
|
||||||
|
if [[ -n "${APP_AI_PREDICATE_MAP_CACHE:-}" ]]; then
|
||||||
|
printf '%s\n' "${APP_AI_PREDICATE_MAP_CACHE}"
|
||||||
|
else
|
||||||
|
printf '{}\n'
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local schema_json="{}"
|
||||||
|
if amduat_api_call GET "/v2/graph/schema/predicates"; then
|
||||||
|
schema_json="${AMDUAT_LAST_BODY}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
APP_AI_PREDICATE_MAP_CACHE="$(
|
||||||
|
printf '%s' "${schema_json}" | jq -c '
|
||||||
|
def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
|
||||||
|
def entry_from(o):
|
||||||
|
{ref: clean(o.predicate_ref // o.ref // o.predicate // o.id // ""), name: clean(o.alias // o.name // o.predicate // o.label // "")};
|
||||||
|
def pred_list(root):
|
||||||
|
if (root | type) == "array" then root
|
||||||
|
elif (root | type) == "object" then (root.predicates // root.items // root.data // root.results // [])
|
||||||
|
else []
|
||||||
|
end;
|
||||||
|
|
||||||
|
(pred_list(.) | map(entry_from(.)) | map(select(.ref != "" and .name != "")))
|
||||||
|
| reduce .[] as $e ({}; .[$e.ref] = $e.name)
|
||||||
|
' 2>/dev/null || printf '{}'
|
||||||
|
)"
|
||||||
|
APP_AI_PREDICATE_MAP_INIT=1
|
||||||
|
printf '%s\n' "${APP_AI_PREDICATE_MAP_CACHE}"
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_build_context() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "jq is required for ai-answer" >&2
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
local retrieve_json="$1"
|
||||||
|
local predicate_map_json="${2:-}"
|
||||||
|
local goals_csv="${3:-}"
|
||||||
|
if [[ -z "${predicate_map_json}" ]]; then
|
||||||
|
predicate_map_json='{}'
|
||||||
|
fi
|
||||||
|
local goals_json="[]"
|
||||||
|
if [[ -n "${goals_csv}" ]]; then
|
||||||
|
goals_json="$(printf '%s' "${goals_csv}" | awk -F',' 'BEGIN{printf "["} {for(i=1;i<=NF;i++){gsub(/^ +| +$/, "", $i); if (length($i)>0){if (printed) printf ","; printf "\"%s\"", $i; printed=1}}} END{printf "]"}')"
|
||||||
|
fi
|
||||||
|
printf '%s' "${retrieve_json}" | jq -r --arg predicate_map_json "${predicate_map_json}" --argjson goals "${goals_json}" '
|
||||||
|
def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
|
||||||
|
def node_name(n): clean(n.name // n.node_ref // n.id // "");
|
||||||
|
($predicate_map_json | fromjson? // {}) as $predicate_map
|
||||||
|
| ($goals | map(clean(.)) | map(select(length > 0))) as $goal_set
|
||||||
|
| (if ($goal_set | length) == 1 then $goal_set[0] else "" end) as $goal_hint
|
||||||
|
|
|
||||||
|
. as $root
|
||||||
|
| (($root.nodes // []) | reduce .[] as $n ({};
|
||||||
|
if (($n.concept_ref // "") | tostring | length) > 0
|
||||||
|
then .[$n.concept_ref] = (node_name($n))
|
||||||
|
else .
|
||||||
|
end
|
||||||
|
)) as $node_map
|
||||||
|
| def edge_triplet(e):
|
||||||
|
{
|
||||||
|
s: clean(e.subject // e.s // ($node_map[e.subject_ref] // e.subject_ref // "")),
|
||||||
|
p: clean($predicate_map[(e.predicate_ref // "")] // e.predicate // e.p // (if $goal_hint != "" then $goal_hint else empty end) // e.predicate_ref // ""),
|
||||||
|
o: clean(e.object // e.o // ($node_map[e.object_ref] // e.object_ref // ""))
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
nodes: (($root.nodes // []) | map(node_name(.)) | map(select(length > 0)) | unique | .[0:50]),
|
||||||
|
edges: (($root.edges // []) | map(edge_triplet(.)) | map(select(.s != "" and .p != "" and .o != "")) | .[0:100])
|
||||||
|
}
|
||||||
|
| "Nodes:\n"
|
||||||
|
+ (if (.nodes | length) == 0 then "- (none)\n" else ((.nodes[] | "- " + .) + "\n") end)
|
||||||
|
+ "Edges:\n"
|
||||||
|
+ (if (.edges | length) == 0 then "- (none)\n" else ((.edges[] | "- " + .s + " --" + .p + "--> " + .o) + "\n") end)
|
||||||
|
'
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_extract_evidence_json() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "jq is required for ai-answer" >&2
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
local retrieve_json="$1"
|
||||||
|
local goals_csv="${2:-}"
|
||||||
|
local predicate_map_json="${3:-}"
|
||||||
|
if [[ -z "${predicate_map_json}" ]]; then
|
||||||
|
predicate_map_json='{}'
|
||||||
|
fi
|
||||||
|
local goals_json="[]"
|
||||||
|
if [[ -n "${goals_csv}" ]]; then
|
||||||
|
goals_json="$(printf '%s' "${goals_csv}" | awk -F',' 'BEGIN{printf "["} {for(i=1;i<=NF;i++){gsub(/^ +| +$/, "", $i); if (length($i)>0){if (printed) printf ","; printf "\"%s\"", $i; printed=1}}} END{printf "]"}')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s' "${retrieve_json}" | jq -c --argjson goals "${goals_json}" --arg predicate_map_json "${predicate_map_json}" --argjson max_edges "${AI_EVIDENCE_MAX_EDGES}" '
|
||||||
|
def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
|
||||||
|
($predicate_map_json | fromjson? // {}) as $predicate_map
|
||||||
|
|
|
||||||
|
. as $root
|
||||||
|
| (($root.nodes // []) | reduce .[] as $n ({};
|
||||||
|
if (($n.concept_ref // "") | tostring | length) > 0
|
||||||
|
then .[$n.concept_ref] = clean($n.name // $n.node_ref // $n.id // $n.concept_ref)
|
||||||
|
else .
|
||||||
|
end
|
||||||
|
)) as $node_map
|
||||||
|
| (($root.explanations // []) | reduce .[] as $x ({}; .[$x.edge_ref] = {depth: ($x.depth // null), reasons: ($x.reasons // [])})) as $exp_map
|
||||||
|
| ($goals | map(clean(.)) | map(select(length > 0))) as $goal_set
|
||||||
|
| (($root.edges // [])
|
||||||
|
| map({
|
||||||
|
edge_ref: clean(.edge_ref // ""),
|
||||||
|
subject: clean(.subject // .s // ($node_map[.subject_ref] // .subject_ref // "")),
|
||||||
|
predicate: clean($predicate_map[(.predicate_ref // "")] // .predicate // .p // .predicate_ref // ""),
|
||||||
|
predicate_name: clean($predicate_map[(.predicate_ref // "")] // .predicate // .p // ""),
|
||||||
|
object: clean(.object // .o // ($node_map[.object_ref] // .object_ref // "")),
|
||||||
|
predicate_ref: clean(.predicate_ref // ""),
|
||||||
|
subject_ref: clean(.subject_ref // ""),
|
||||||
|
object_ref: clean(.object_ref // "")
|
||||||
|
})
|
||||||
|
| map(select(.subject != "" and .predicate != "" and .object != ""))
|
||||||
|
| map(. + ($exp_map[.edge_ref] // {depth:null,reasons:[]}))) as $all_edges
|
||||||
|
| ($all_edges | map(
|
||||||
|
if ($goal_set | length) == 0 then .
|
||||||
|
else select((.predicate as $p | $goal_set | index($p)) != null or (.predicate_ref as $pr | $goal_set | index($pr)) != null)
|
||||||
|
end
|
||||||
|
)) as $filtered_edges
|
||||||
|
| (if ($goal_set | length) > 0 and ($filtered_edges | length) == 0 then $all_edges else $filtered_edges end) as $selected_edges
|
||||||
|
| (if ($goal_set | length) == 1 then $goal_set[0] else "" end) as $goal_hint
|
||||||
|
| ($selected_edges | map(
|
||||||
|
if .predicate_name == "" and $goal_hint != "" then . + {predicate_name:$goal_hint, predicate:$goal_hint}
|
||||||
|
else .
|
||||||
|
end
|
||||||
|
))
|
||||||
|
| .[0:$max_edges]
|
||||||
|
'
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_answer_json() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "jq is required for ai-answer" >&2
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
local roots_csv="$1"
|
||||||
|
local question="$2"
|
||||||
|
local goals_csv="${3:-}"
|
||||||
|
local require_evidence="${4:-0}"
|
||||||
|
|
||||||
|
local retrieve_out
|
||||||
|
retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")" || return 1
|
||||||
|
|
||||||
|
local predicate_map_json
|
||||||
|
predicate_map_json="$(app_ai_predicate_map_json)" || return $?
|
||||||
|
|
||||||
|
local has_context
|
||||||
|
has_context="$(printf '%s' "${retrieve_out}" | jq -r '((.nodes // []) | length) + ((.edges // []) | length) > 0')"
|
||||||
|
local evidence
|
||||||
|
evidence="$(app_ai_extract_evidence_json "${retrieve_out}" "${goals_csv}" "${predicate_map_json}")" || return $?
|
||||||
|
local evidence_count
|
||||||
|
evidence_count="$(printf '%s' "${evidence}" | jq -r 'length')"
|
||||||
|
local has_evidence="false"
|
||||||
|
if [[ "${evidence_count}" -gt 0 ]]; then
|
||||||
|
has_evidence="true"
|
||||||
|
fi
|
||||||
|
if [[ "${has_context}" != "true" ]]; then
|
||||||
|
jq -nc --arg model "${OLLAMA_MODEL}" \
|
||||||
|
--arg response "Insufficient graph context for the requested roots/predicates. Ingest or reference existing nodes/edges first." \
|
||||||
|
--argjson evidence "${evidence}" \
|
||||||
|
--argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" \
|
||||||
|
'{model:$model,response:$response,done:true,done_reason:"no_context",evidence:$evidence,grounding:{has_evidence:false,require_evidence:$require_evidence}}'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${require_evidence}" == "1" && "${has_evidence}" != "true" ]]; then
|
||||||
|
jq -nc --arg model "${OLLAMA_MODEL}" \
|
||||||
|
--arg response "No supporting graph evidence found for the requested roots/predicates." \
|
||||||
|
--argjson evidence "${evidence}" \
|
||||||
|
'{model:$model,response:$response,done:true,done_reason:"no_evidence",evidence:$evidence,grounding:{has_evidence:false,require_evidence:true}}'
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local context
|
||||||
|
context="$(app_ai_build_context "${retrieve_out}" "${predicate_map_json}" "${goals_csv}")" || return $?
|
||||||
|
context="$(printf '%s' "${context}" | head -c "${AI_CONTEXT_MAX_CHARS}")"
|
||||||
|
|
||||||
|
local prompt
|
||||||
|
prompt="$(cat <<PROMPT
|
||||||
|
Answer the question using ONLY the graph context below.
|
||||||
|
If the context is insufficient, say exactly what is missing.
|
||||||
|
Keep the answer concise.
|
||||||
|
|
||||||
|
Graph context:
|
||||||
|
${context}
|
||||||
|
|
||||||
|
Question:
|
||||||
|
${question}
|
||||||
|
PROMPT
|
||||||
|
)"
|
||||||
|
|
||||||
|
local model_out
|
||||||
|
model_out="$(app_ai_generate_json "${prompt}")" || return $?
|
||||||
|
printf '%s' "${model_out}" | jq -c --argjson evidence "${evidence}" --argjson has_evidence "${has_evidence}" --argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" '. + {evidence:$evidence, grounding:{has_evidence:$has_evidence, require_evidence:$require_evidence}}'
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_answer_text() {
|
||||||
|
local roots_csv="$1"
|
||||||
|
local question="$2"
|
||||||
|
local goals_csv="${3:-}"
|
||||||
|
local require_evidence="${4:-0}"
|
||||||
|
local out
|
||||||
|
out="$(app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}")" || return $?
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "${out}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
printf '%s' "${out}" | jq -r '.response // empty'
|
||||||
|
printf '\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
app_ai_generate_text() {
|
||||||
|
local prompt="$1"
|
||||||
|
local out
|
||||||
|
out="$(app_ai_generate_json "${prompt}")" || return $?
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "${out}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
printf '%s' "${out}" | jq -r '.response // empty'
|
||||||
|
printf '\n'
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,11 @@ amduat_config_load() {
|
||||||
local override_retry_max_ms="${RETRY_MAX_MS:-}"
|
local override_retry_max_ms="${RETRY_MAX_MS:-}"
|
||||||
local override_connect_timeout="${CURL_CONNECT_TIMEOUT_SECONDS:-}"
|
local override_connect_timeout="${CURL_CONNECT_TIMEOUT_SECONDS:-}"
|
||||||
local override_max_time="${CURL_MAX_TIME_SECONDS:-}"
|
local override_max_time="${CURL_MAX_TIME_SECONDS:-}"
|
||||||
|
local override_ollama_host="${OLLAMA_HOST:-}"
|
||||||
|
local override_ollama_model="${OLLAMA_MODEL:-}"
|
||||||
|
local override_ollama_timeout="${OLLAMA_TIMEOUT_SECONDS:-}"
|
||||||
|
local override_ai_context_max_chars="${AI_CONTEXT_MAX_CHARS:-}"
|
||||||
|
local override_ai_evidence_max_edges="${AI_EVIDENCE_MAX_EDGES:-}"
|
||||||
|
|
||||||
local env_file="${root_dir}/config/env.local"
|
local env_file="${root_dir}/config/env.local"
|
||||||
if [[ ! -f "${env_file}" ]]; then
|
if [[ ! -f "${env_file}" ]]; then
|
||||||
|
|
@ -34,6 +39,11 @@ amduat_config_load() {
|
||||||
if [[ -n "${override_retry_max_ms}" ]]; then RETRY_MAX_MS="${override_retry_max_ms}"; fi
|
if [[ -n "${override_retry_max_ms}" ]]; then RETRY_MAX_MS="${override_retry_max_ms}"; fi
|
||||||
if [[ -n "${override_connect_timeout}" ]]; then CURL_CONNECT_TIMEOUT_SECONDS="${override_connect_timeout}"; fi
|
if [[ -n "${override_connect_timeout}" ]]; then CURL_CONNECT_TIMEOUT_SECONDS="${override_connect_timeout}"; fi
|
||||||
if [[ -n "${override_max_time}" ]]; then CURL_MAX_TIME_SECONDS="${override_max_time}"; fi
|
if [[ -n "${override_max_time}" ]]; then CURL_MAX_TIME_SECONDS="${override_max_time}"; fi
|
||||||
|
if [[ -n "${override_ollama_host}" ]]; then OLLAMA_HOST="${override_ollama_host}"; fi
|
||||||
|
if [[ -n "${override_ollama_model}" ]]; then OLLAMA_MODEL="${override_ollama_model}"; fi
|
||||||
|
if [[ -n "${override_ollama_timeout}" ]]; then OLLAMA_TIMEOUT_SECONDS="${override_ollama_timeout}"; fi
|
||||||
|
if [[ -n "${override_ai_context_max_chars}" ]]; then AI_CONTEXT_MAX_CHARS="${override_ai_context_max_chars}"; fi
|
||||||
|
if [[ -n "${override_ai_evidence_max_edges}" ]]; then AI_EVIDENCE_MAX_EDGES="${override_ai_evidence_max_edges}"; fi
|
||||||
|
|
||||||
SOCK="${SOCK:-amduatd.sock}"
|
SOCK="${SOCK:-amduatd.sock}"
|
||||||
BASE="${BASE:-http://localhost}"
|
BASE="${BASE:-http://localhost}"
|
||||||
|
|
@ -52,4 +62,10 @@ amduat_config_load() {
|
||||||
|
|
||||||
CURL_CONNECT_TIMEOUT_SECONDS="${CURL_CONNECT_TIMEOUT_SECONDS:-2}"
|
CURL_CONNECT_TIMEOUT_SECONDS="${CURL_CONNECT_TIMEOUT_SECONDS:-2}"
|
||||||
CURL_MAX_TIME_SECONDS="${CURL_MAX_TIME_SECONDS:-30}"
|
CURL_MAX_TIME_SECONDS="${CURL_MAX_TIME_SECONDS:-30}"
|
||||||
|
|
||||||
|
OLLAMA_HOST="${OLLAMA_HOST:-http://127.0.0.1:11434}"
|
||||||
|
OLLAMA_MODEL="${OLLAMA_MODEL:-qwen2.5-coder:7b}"
|
||||||
|
OLLAMA_TIMEOUT_SECONDS="${OLLAMA_TIMEOUT_SECONDS:-60}"
|
||||||
|
AI_CONTEXT_MAX_CHARS="${AI_CONTEXT_MAX_CHARS:-12000}"
|
||||||
|
AI_EVIDENCE_MAX_EDGES="${AI_EVIDENCE_MAX_EDGES:-5}"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
118
tests/ai_answer_eval.sh
Executable file
118
tests/ai_answer_eval.sh
Executable file
|
|
@ -0,0 +1,118 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "${ROOT_DIR}/src/app_v2.sh"
|
||||||
|
|
||||||
|
require_jq() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "ai_answer_eval.sh: jq is required" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
fail() {
|
||||||
|
echo "ai_answer_eval.sh: FAIL: $1" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
require_jq
|
||||||
|
app_init
|
||||||
|
|
||||||
|
tmp_dir="$(mktemp -d /tmp/ai-answer-eval.XXXXXX)"
|
||||||
|
cleanup() {
|
||||||
|
rm -rf "${tmp_dir}"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
mock_prompt_file="${tmp_dir}/prompt.txt"
|
||||||
|
mock_retrieve_path_file="${tmp_dir}/retrieve.path"
|
||||||
|
mock_retrieve_payload_file="${tmp_dir}/retrieve.payload"
|
||||||
|
|
||||||
|
amduat_api_call() {
|
||||||
|
local method="$1"
|
||||||
|
local path="$2"
|
||||||
|
local body="${3:-}"
|
||||||
|
|
||||||
|
if [[ "${method}" == "GET" && "${path}" == "/v2/graph/schema/predicates" ]]; then
|
||||||
|
AMDUAT_LAST_STATUS="200"
|
||||||
|
AMDUAT_LAST_BODY='{"predicates":[{"predicate_ref":"ref-ms-within-domain","alias":"ms.within_domain"}]}'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${method}" == "POST" && "${path}" == "/v2/graph/retrieve" ]]; then
|
||||||
|
printf '%s' "${path}" > "${mock_retrieve_path_file}"
|
||||||
|
printf '%s' "${body}" > "${mock_retrieve_payload_file}"
|
||||||
|
AMDUAT_LAST_STATUS="200"
|
||||||
|
if [[ "${MOCK_NO_EDGES:-0}" == "1" ]]; then
|
||||||
|
AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1"},{"name":"topic:alpha"}],"edges":[]}'
|
||||||
|
else
|
||||||
|
AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1","concept_ref":"ref-doc1"},{"name":"topic:alpha","concept_ref":"ref-topic-alpha"}],"edges":[{"subject_ref":"ref-doc1","predicate_ref":"ref-ms-within-domain","object_ref":"ref-topic-alpha","edge_ref":"ref-edge-1"}]}'
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
AMDUAT_LAST_STATUS="404"
|
||||||
|
AMDUAT_LAST_BODY='{"error":"not mocked"}'
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
curl() {
|
||||||
|
local body=""
|
||||||
|
local endpoint=""
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--data-binary)
|
||||||
|
body="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
http://*|https://*)
|
||||||
|
endpoint="$1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
[[ "${endpoint}" == "${OLLAMA_HOST}/api/generate" ]] || fail "unexpected curl endpoint: ${endpoint}"
|
||||||
|
prompt="$(printf '%s' "${body}" | jq -r '.prompt')"
|
||||||
|
printf '%s' "${prompt}" > "${mock_prompt_file}"
|
||||||
|
printf '%s\n' '{"model":"mock-model","response":"Grounded answer from mock model."}'
|
||||||
|
}
|
||||||
|
|
||||||
|
json_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_json failed"
|
||||||
|
printf '%s' "${json_out}" | jq -e '.response == "Grounded answer from mock model."' >/dev/null || fail "unexpected response payload"
|
||||||
|
printf '%s' "${json_out}" | jq -e '.evidence | length == 1' >/dev/null || fail "missing evidence"
|
||||||
|
printf '%s' "${json_out}" | jq -e '.grounding.has_evidence == true' >/dev/null || fail "grounding.has_evidence should be true"
|
||||||
|
printf '%s' "${json_out}" | jq -e '.evidence[0].subject == "doc:1" and .evidence[0].predicate == "ms.within_domain" and .evidence[0].object == "topic:alpha"' >/dev/null \
|
||||||
|
|| fail "evidence triplet mismatch"
|
||||||
|
printf '%s' "${json_out}" | jq -e '.evidence[0].predicate_name == "ms.within_domain"' >/dev/null || fail "predicate_name should resolve from schema"
|
||||||
|
|
||||||
|
[[ -f "${mock_retrieve_path_file}" ]] || fail "retrieve call was not made"
|
||||||
|
[[ "$(cat "${mock_retrieve_path_file}")" == "/v2/graph/retrieve" ]] || fail "retrieve path mismatch"
|
||||||
|
|
||||||
|
retrieve_payload="$(cat "${mock_retrieve_payload_file}")"
|
||||||
|
printf '%s' "${retrieve_payload}" | jq -e '.roots == ["doc:1"]' >/dev/null || fail "roots payload mismatch"
|
||||||
|
printf '%s' "${retrieve_payload}" | jq -e '.goal_predicates == ["ms.within_domain"]' >/dev/null || fail "goal_predicates payload mismatch"
|
||||||
|
|
||||||
|
prompt_text="$(cat "${mock_prompt_file}")"
|
||||||
|
[[ "${prompt_text}" == *"Question:"* ]] || fail "prompt missing question label"
|
||||||
|
[[ "${prompt_text}" == *"What domain is doc:1 in?"* ]] || fail "prompt missing question"
|
||||||
|
[[ "${prompt_text}" == *"doc:1 --ms.within_domain--> topic:alpha"* ]] || fail "prompt missing graph edge context"
|
||||||
|
|
||||||
|
text_out="$(app_ai_answer_text "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_text failed"
|
||||||
|
[[ "${text_out}" == "Grounded answer from mock model." ]] || fail "text output mismatch"
|
||||||
|
|
||||||
|
set +e
|
||||||
|
MOCK_NO_EDGES=1 strict_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain" "1")"
|
||||||
|
strict_rc=$?
|
||||||
|
set -e
|
||||||
|
[[ "${strict_rc}" -ne 0 ]] || fail "expected non-zero for --require-evidence with no supporting edges"
|
||||||
|
printf '%s' "${strict_out}" | jq -e '.done_reason == "no_evidence"' >/dev/null || fail "expected done_reason no_evidence"
|
||||||
|
printf '%s' "${strict_out}" | jq -e '.grounding.require_evidence == true and .grounding.has_evidence == false' >/dev/null \
|
||||||
|
|| fail "expected strict grounding flags"
|
||||||
|
|
||||||
|
echo "ai_answer_eval.sh: PASS"
|
||||||
37
tests/ai_eval.sh
Executable file
37
tests/ai_eval.sh
Executable file
|
|
@ -0,0 +1,37 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "${ROOT_DIR}/src/app_v2.sh"
|
||||||
|
|
||||||
|
require_jq() {
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "ai_eval.sh: jq is required" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
fail() {
|
||||||
|
echo "ai_eval.sh: FAIL: $1" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
app_init
|
||||||
|
require_jq
|
||||||
|
|
||||||
|
tags_out="$(app_ai_check)" || fail "ai-check failed"
|
||||||
|
printf '%s' "${tags_out}" | jq -e '.models | type == "array"' >/dev/null || fail "tags response missing models array"
|
||||||
|
printf '%s' "${tags_out}" | jq -e --arg model "${OLLAMA_MODEL}" '.models[] | select(.name == $model)' >/dev/null \
|
||||||
|
|| fail "configured model not present: ${OLLAMA_MODEL}"
|
||||||
|
|
||||||
|
prompt="Return one short sentence describing graph retrieval testing."
|
||||||
|
gen_out="$(app_ai_generate "${prompt}")" || fail "ai-generate failed"
|
||||||
|
|
||||||
|
printf '%s' "${gen_out}" | jq -e '.response | type == "string"' >/dev/null || fail "generate response missing text"
|
||||||
|
printf '%s' "${gen_out}" | jq -e '.model | type == "string"' >/dev/null || fail "generate response missing model"
|
||||||
|
|
||||||
|
response_text="$(printf '%s' "${gen_out}" | jq -r '.response')"
|
||||||
|
[[ -n "${response_text//[[:space:]]/}" ]] || fail "generate response text is empty"
|
||||||
|
|
||||||
|
echo "ai_eval.sh: PASS"
|
||||||
|
|
@ -25,8 +25,49 @@ assert_contains() {
|
||||||
# in the regular integration entrypoint.
|
# in the regular integration entrypoint.
|
||||||
"${ROOT_DIR}/tests/changes_consumer_handler.sh"
|
"${ROOT_DIR}/tests/changes_consumer_handler.sh"
|
||||||
|
|
||||||
app_init
|
|
||||||
require_jq
|
require_jq
|
||||||
|
IT_USE_EXISTING_DAEMON="${IT_USE_EXISTING_DAEMON:-0}"
|
||||||
|
it_started_daemon=0
|
||||||
|
it_daemon_pid=""
|
||||||
|
it_root="${IT_STORE_ROOT:-/tmp/amduat-asl-it-${USER:-user}}"
|
||||||
|
it_sock="${IT_SOCK:-/tmp/amduatd-it-${USER:-user}.sock}"
|
||||||
|
it_backend="${IT_STORE_BACKEND:-fs}"
|
||||||
|
it_log="${IT_DAEMON_LOG_PATH:-/tmp/integration-v2-daemon.log}"
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
if [[ "${it_started_daemon}" == "1" && -n "${it_daemon_pid}" ]]; then
|
||||||
|
kill "${it_daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
wait "${it_daemon_pid}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
if [[ "${IT_USE_EXISTING_DAEMON}" != "1" ]]; then
|
||||||
|
rm -f "${it_sock}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
if [[ "${IT_USE_EXISTING_DAEMON}" != "1" ]]; then
|
||||||
|
rm -f "${it_sock}" >/dev/null 2>&1 || true
|
||||||
|
export SOCK="${it_sock}"
|
||||||
|
STORE_BACKEND="${it_backend}" STORE_ROOT="${it_root}" SOCK="${it_sock}" SPACE="${SPACE:-app1}" \
|
||||||
|
nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${it_log}" 2>&1 &
|
||||||
|
it_daemon_pid="$!"
|
||||||
|
it_started_daemon=1
|
||||||
|
|
||||||
|
ready=0
|
||||||
|
for _ in $(seq 1 120); do
|
||||||
|
if curl --globoff --silent --show-error --unix-socket "${it_sock}" "http://localhost/v2/readyz" >/dev/null 2>&1; then
|
||||||
|
ready=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
if [[ "${ready}" != "1" ]]; then
|
||||||
|
echo "integration_v2.sh: FAIL (isolated daemon did not become ready, log: ${it_log})" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
app_init
|
||||||
if [[ ! -S "${SOCK}" ]]; then
|
if [[ ! -S "${SOCK}" ]]; then
|
||||||
echo "integration_v2.sh: SKIP (socket not found at ${SOCK})"
|
echo "integration_v2.sh: SKIP (socket not found at ${SOCK})"
|
||||||
exit 77
|
exit 77
|
||||||
|
|
@ -40,8 +81,8 @@ assert_contains "${startup_out}" '"ok"'
|
||||||
run_id="$(date +%s)"
|
run_id="$(date +%s)"
|
||||||
trace_id="trace-it-${run_id}"
|
trace_id="trace-it-${run_id}"
|
||||||
idempotency_key="it-seed-${run_id}"
|
idempotency_key="it-seed-${run_id}"
|
||||||
doc_name="doc-it${run_id}"
|
doc_name="docit${run_id}"
|
||||||
topic_name="topic-italpha${run_id}"
|
topic_name="topicitalpha${run_id}"
|
||||||
payload="$(cat <<JSON
|
payload="$(cat <<JSON
|
||||||
{
|
{
|
||||||
"idempotency_key":"${idempotency_key}",
|
"idempotency_key":"${idempotency_key}",
|
||||||
|
|
@ -81,6 +122,23 @@ assert_contains "${sync_out}" '"events"'
|
||||||
retrieve_out="$(app_retrieve_with_fallback "${doc_name}" "ms.within_domain")"
|
retrieve_out="$(app_retrieve_with_fallback "${doc_name}" "ms.within_domain")"
|
||||||
assert_contains "${retrieve_out}" '"edges"'
|
assert_contains "${retrieve_out}" '"edges"'
|
||||||
|
|
||||||
|
# 4b) optional live AI-over-retrieval path (requires reachable Ollama)
|
||||||
|
if [[ "${RUN_AI_RETRIEVE_LIVE:-0}" == "1" ]]; then
|
||||||
|
ai_answer_out="$(app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain")"
|
||||||
|
printf '%s' "${ai_answer_out}" | jq -e '.response | type == "string"' >/dev/null || {
|
||||||
|
echo "expected ai-answer to return JSON with response text" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
printf '%s' "${ai_answer_out}" | jq -e '.evidence | type == "array"' >/dev/null || {
|
||||||
|
echo "expected ai-answer to include evidence array" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain" "1" >/dev/null || {
|
||||||
|
echo "expected strict ai-answer to succeed when evidence exists" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
|
||||||
# Capture edge_ref using subgraph surface to avoid format differences.
|
# Capture edge_ref using subgraph surface to avoid format differences.
|
||||||
subgraph_out="$(amduat_api_call GET "/v2/graph/subgraph?roots[]=${doc_name}&max_depth=2&dir=outgoing&limit_nodes=200&limit_edges=400&include_stats=true&max_result_bytes=1048576" && printf '%s' "${AMDUAT_LAST_BODY}")"
|
subgraph_out="$(amduat_api_call GET "/v2/graph/subgraph?roots[]=${doc_name}&max_depth=2&dir=outgoing&limit_nodes=200&limit_edges=400&include_stats=true&max_result_bytes=1048576" && printf '%s' "${AMDUAT_LAST_BODY}")"
|
||||||
edge_ref="$(printf '%s' "${subgraph_out}" | jq -r '.edges[0].edge_ref // empty')"
|
edge_ref="$(printf '%s' "${subgraph_out}" | jq -r '.edges[0].edge_ref // empty')"
|
||||||
|
|
|
||||||
2
vendor/amduat-api
vendored
2
vendor/amduat-api
vendored
|
|
@ -1 +1 @@
|
||||||
Subproject commit b8c0a6e6d0dea9d6adcfe0c1360564f1badd457e
|
Subproject commit 0ae2c8d74a85bbcb633e711fb1cf84d0516bdc3b
|
||||||
Loading…
Reference in a new issue