Update app v2 docs/scripts and add AI eval helpers

2026-02-08 00:07:35 +01:00 · 2026-02-08 00:07:35 +01:00 · f0e3768412
parent 610a3e4848
commit f0e3768412
12 changed files with 823 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -23,8 +23,10 @@ cp config/env.example config/env.local
 ```
 `dev_start_daemon.sh` initializes the store for the selected backend and, when
 `STORE_BACKEND=index`, runs a quick startup write probe. If index writes are
-unhealthy, it automatically falls back to `fs` (configurable via
-`INDEX_BACKEND_PROBE` and `INDEX_BACKEND_FALLBACK` in `config/env.local`).
+unhealthy, it first tries a one-time index store repair (backup + re-init) and
+then falls back to `fs` if still unhealthy (configurable via
+`INDEX_BACKEND_PROBE`, `INDEX_BACKEND_REPAIR`, and `INDEX_BACKEND_FALLBACK` in
+`config/env.local`).

 3. Run startup checks against the daemon socket:

@ -50,6 +52,12 @@ Use the integrated v2 app flow wrapper:

 ```sh
 ./scripts/v2_app.sh startup-check
+./scripts/v2_app.sh ai-check
+./scripts/v2_app.sh ai-generate 'Summarize retrieval behavior in one sentence.'
+./scripts/v2_app.sh ai-generate --json 'Summarize retrieval behavior in one sentence.'
+./scripts/v2_app.sh ai-answer 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
+./scripts/v2_app.sh ai-answer --json 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
+./scripts/v2_app.sh ai-answer --json --require-evidence 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
 ./scripts/v2_app.sh ingest '{"idempotency_key":"k1","mode":"continue_on_error","nodes":[{"name":"doc-1"}]}'
 ./scripts/v2_app.sh sync-once
 ./scripts/v2_app.sh consume-changes --once
@ -70,6 +78,13 @@ Run local cursor/handler semantics checks (no daemon required):
 ./tests/changes_consumer_handler.sh
 ```

+Run AI connectivity and generation smoke eval (no daemon required):
+
+```sh
+./tests/ai_eval.sh
+./tests/ai_answer_eval.sh
+```
+
 Run a fast end-to-end smoke (startup + ingest + sync + retrieve + tombstone):

 ```sh
--- a/config/env.example
+++ b/config/env.example
@ -9,6 +9,8 @@ STORE_BACKEND="index"
 # For index backend, run a startup write probe and fallback to fs if broken.
 INDEX_BACKEND_PROBE="1"
 INDEX_BACKEND_FALLBACK="fs"
+# Attempt one-time index root repair (backup + index init --force) before fs fallback.
+INDEX_BACKEND_REPAIR="1"
 FS_FALLBACK_STORE_ROOT=".amduat-asl-fs"
 # AMDUATD_BIN="/path/to/amduatd"
 # ASL_BIN="/path/to/amduat-asl"
@ -26,3 +28,10 @@ RETRY_MAX_MS="2000"
 # Curl timeouts
 CURL_CONNECT_TIMEOUT_SECONDS="2"
 CURL_MAX_TIME_SECONDS="30"
+
+# Optional Ollama AI settings
+OLLAMA_HOST="http://127.0.0.1:11434"
+OLLAMA_MODEL="qwen2.5-coder:7b"
+OLLAMA_TIMEOUT_SECONDS="60"
+AI_CONTEXT_MAX_CHARS="12000"
+AI_EVIDENCE_MAX_EDGES="5"
--- a/docs/index-backend-repro.md
+++ b/docs/index-backend-repro.md
@ -0,0 +1,85 @@
+# Index Backend Repro (Isolated)
+
+## Summary
+
+With a fresh store root, `amduatd` on `--store-backend index` accepts the first
+`/v2/graph/nodes` write, then fails on the second node write with `{"error":"store error"}`.
+The same call sequence succeeds on `--store-backend fs`.
+
+Observed daemon log on index:
+
+```text
+ERROR: edge append failed for space/app1/daemon/edges (err=2)
+```
+
+`err=2` maps to `AMDUAT_ASL_COLLECTION_ERR_IO`.
+
+## Minimal Repro
+
+### 1) Start index backend
+
+```bash
+STORE_BACKEND=index INDEX_BACKEND_PROBE=0 \
+STORE_ROOT=/tmp/amduat-asl-index-iso \
+SOCK=/tmp/amduatd-index-iso.sock SPACE=app1 \
+./scripts/dev_start_daemon.sh
+```
+
+### 2) Execute direct API calls
+
+```bash
+curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \
+  -H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \
+  -X POST --data-binary '{"name":"doca1"}' \
+  http://localhost/v2/graph/nodes
+
+curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \
+  -H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \
+  -X POST --data-binary '{"name":"topica1"}' \
+  http://localhost/v2/graph/nodes
+
+curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \
+  -H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \
+  -X POST --data-binary '{"subject":"doca1","predicate":"ms.within_domain","object":"topica1","provenance":{"source_uri":"urn:t","extractor":"x","observed_at":1,"ingested_at":2,"trace_id":"t1"}}' \
+  http://localhost/v2/graph/edges
+```
+
+Expected: both nodes + edge succeed.
+Actual on index: second node returns `{"error":"store error"}`, edge returns `{"error":"object not found"}`.
+
+### 3) Control check (fs backend)
+
+Run the same payloads against fs:
+
+```bash
+STORE_BACKEND=fs STORE_ROOT=/tmp/amduat-asl-fs-iso \
+SOCK=/tmp/amduatd-fs-iso.sock SPACE=app1 \
+./scripts/dev_start_daemon.sh
+```
+
+All three calls succeed on `fs`.
+
+## Likely Failure Path
+
+From source:
+
+- `vendor/amduat-api/src/amduatd_concepts.c`
+  - edge append path calls `amduat_asl_collection_append(...)`
+- `vendor/amduat-api/vendor/amduat/src/core/asl_collection.c`
+  - `AMDUAT_ASL_COLLECTION_ERR_IO` comes from `amduat_asl_log_append(...)` failure
+- `vendor/amduat-api/vendor/amduat/src/core/asl_log_store.c`
+  - failure likely in pointer/log append path (`pointer_get`, `store_get`, or `pointer_cas`)
+
+## Targeted Upstream Patch Path
+
+1. Add temporary diagnostic logging in `amduat_asl_log_append(...)` for:
+   - `pointer_name`
+   - `ptr_err` from `amduat_asl_pointer_get`
+   - `store_err` from `amduat_asl_store_get` / `amduat_asl_store_put`
+   - `cas_err` and retry exhaustion path
+2. Re-run the minimal repro above and capture the first non-OK internal return.
+3. Patch the specific failing branch in `asl_log_store.c` (or lower index store path)
+   rather than adding retries in app code.
+4. Restore/commit missing graph index append scripts expected by CTest:
+   - `vendor/amduat-api/scripts/test_graph_index_append.sh`
+   - `vendor/amduat-api/scripts/test_graph_index_append_stress.sh`
--- a/docs/v2-app-developer-guide.md
+++ b/docs/v2-app-developer-guide.md
@ -273,3 +273,19 @@ curl --unix-socket "${SOCK}" -sS -X POST "${BASE}/v2/graph/edges/tombstone" \
  -H "X-Amduat-Space: ${SPACE}" \
  -d "{\"edge_ref\":\"${EDGE_REF}\"}"
 ```
+
+## 11) AI Answer Wrapper (Grounded)
+
+For local app usage via this scaffold:
+
+```sh
+./scripts/v2_app.sh ai-answer 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
+./scripts/v2_app.sh ai-answer --json 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
+./scripts/v2_app.sh ai-answer --json --require-evidence 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain'
+```
+
+Behavior notes:
+
+- The command retrieves graph context first (`/v2/graph/retrieve` with `/v2/graph/subgraph` fallback).
+- JSON output includes an `evidence[]` array with normalized triplets and refs (`predicate_ref` retained, `predicate_name` preferred when resolvable).
+- `--require-evidence` enforces strict grounding: command exits non-zero when no supporting edges are found.
--- a/scripts/dev_start_daemon.sh
+++ b/scripts/dev_start_daemon.sh
@ -8,6 +8,10 @@ override_space="${SPACE:-}"
 override_sock="${SOCK:-}"
 override_amduatd_bin="${AMDUATD_BIN:-}"
 override_asl_bin="${ASL_BIN:-}"
+override_index_backend_probe="${INDEX_BACKEND_PROBE:-}"
+override_index_backend_fallback="${INDEX_BACKEND_FALLBACK:-}"
+override_index_backend_repair="${INDEX_BACKEND_REPAIR:-}"
+override_fs_fallback_store_root="${FS_FALLBACK_STORE_ROOT:-}"

 ENV_FILE="${ROOT_DIR}/config/env.local"
 if [[ ! -f "${ENV_FILE}" ]]; then
@ -22,6 +26,10 @@ if [[ -n "${override_space}" ]]; then SPACE="${override_space}"; fi
 if [[ -n "${override_sock}" ]]; then SOCK="${override_sock}"; fi
 if [[ -n "${override_amduatd_bin}" ]]; then AMDUATD_BIN="${override_amduatd_bin}"; fi
 if [[ -n "${override_asl_bin}" ]]; then ASL_BIN="${override_asl_bin}"; fi
+if [[ -n "${override_index_backend_probe}" ]]; then INDEX_BACKEND_PROBE="${override_index_backend_probe}"; fi
+if [[ -n "${override_index_backend_fallback}" ]]; then INDEX_BACKEND_FALLBACK="${override_index_backend_fallback}"; fi
+if [[ -n "${override_index_backend_repair}" ]]; then INDEX_BACKEND_REPAIR="${override_index_backend_repair}"; fi
+if [[ -n "${override_fs_fallback_store_root}" ]]; then FS_FALLBACK_STORE_ROOT="${override_fs_fallback_store_root}"; fi

 STORE_ROOT="${STORE_ROOT:-${ROOT_DIR}/.amduat-asl}"
 STORE_BACKEND="${STORE_BACKEND:-index}"
@ -29,6 +37,7 @@ SPACE="${SPACE:-app1}"
 SOCK="${SOCK:-${ROOT_DIR}/amduatd.sock}"
 INDEX_BACKEND_PROBE="${INDEX_BACKEND_PROBE:-1}"
 INDEX_BACKEND_FALLBACK="${INDEX_BACKEND_FALLBACK:-fs}"
+INDEX_BACKEND_REPAIR="${INDEX_BACKEND_REPAIR:-1}"
 FS_FALLBACK_STORE_ROOT="${FS_FALLBACK_STORE_ROOT:-${STORE_ROOT}-fs}"

 if [[ "${STORE_ROOT}" != /* ]]; then STORE_ROOT="${ROOT_DIR}/${STORE_ROOT}"; fi
@ -106,26 +115,95 @@ wait_ready() {
 probe_index_write_path() {
  local sock="$1"
  local space="$2"
+  local probe_idx
+  for probe_idx in 1 2 3 4 5; do
    local run_id
-  run_id="$(date +%s)"
-  local doc="probe-doc-${run_id}"
-  local topic="probe-topic-${run_id}"
+    run_id="$(date +%s%N)${RANDOM}${probe_idx}"
+    # Keep probe names conservative (alnum only) across backend/name-policy variants.
+    local doc="probe${run_id}a"
+    local topic="probe${run_id}b"
    local payload
    payload="$(cat <<JSON
-{"idempotency_key":"probe-${run_id}","mode":"continue_on_error","nodes":[{"name":"${doc}"},{"name":"${topic}"}],"edges":[{"subject":"${doc}","predicate":"ms.within_domain","object":"${topic}","provenance":{"source_uri":"urn:probe","extractor":"dev-start","observed_at":1,"ingested_at":2,"trace_id":"probe-${run_id}"}}]}
+{
+  "idempotency_key":"probe-${run_id}",
+  "mode":"continue_on_error",
+  "nodes":[{"name":"${doc}"},{"name":"${topic}"}],
+  "edges":[
+    {
+      "subject":"${doc}",
+      "predicate":"ms.within_domain",
+      "object":"${topic}",
+      "provenance":{
+        "source_uri":"urn:probe:index",
+        "extractor":"dev-start-probe",
+        "observed_at":1,
+        "ingested_at":2,
+        "trace_id":"probe-${run_id}"
+      }
+    }
+  ]
+}
 JSON
 )"
-  local out
-  out="$(curl --globoff --silent --show-error --unix-socket "${sock}" \
+    local raw out code
+    raw="$(curl --globoff --silent --show-error --unix-socket "${sock}" \
      -H "Content-Type: application/json" \
      -H "X-Amduat-Space: ${space}" \
      -X POST --data-binary "${payload}" \
+      -w $'\n%{http_code}' \
      "http://localhost/v2/graph/batch")" || return 1
-  if [[ "${out}" == *'"ok":true'* ]]; then
-    return 0
-  fi
-  echo "index probe response: ${out}" >&2
+    code="${raw##*$'\n'}"
+    out="${raw%$'\n'*}"
+
+    if [[ "${code}" != "200" ]]; then
+      echo "index probe HTTP ${code}: ${out}" >&2
      return 1
+    fi
+
+    # A successful backend health check is "node+edge write path is healthy for repeated valid payloads".
+    if command -v jq >/dev/null 2>&1; then
+      if ! printf '%s' "${out}" | jq -e '.' >/dev/null 2>&1; then
+        echo "index probe returned non-JSON payload: ${out}" >&2
+        return 1
+      fi
+      if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; ((.code // 0) | tonumber) >= 500)' >/dev/null 2>&1; then
+        echo "index probe saw server error result: ${out}" >&2
+        return 1
+      fi
+      if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; (.status == "error") or (((.code // 0) | tonumber) >= 400))' >/dev/null 2>&1; then
+        echo "index probe saw non-success result: ${out}" >&2
+        return 1
+      fi
+      if ! printf '%s' "${out}" | jq -e '([(.results // [] )[]? | select(.kind == "node" and .status == "applied" and ((.code // 0) | tonumber) < 300)] | length) >= 2' >/dev/null 2>&1; then
+        echo "index probe missing applied node writes: ${out}" >&2
+        return 1
+      fi
+      if ! printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; .kind == "edge" and .status == "applied" and ((.code // 0) | tonumber) < 300)' >/dev/null 2>&1; then
+        echo "index probe missing applied edge write: ${out}" >&2
+        return 1
+      fi
+      if ! printf '%s' "${out}" | jq -e '.ok == true' >/dev/null 2>&1; then
+        echo "index probe non-ok payload: ${out}" >&2
+        return 1
+      fi
+      continue
+    fi
+
+    if [[ "${out}" == *'"ok":true'* ]]; then
+      continue
+    fi
+    if [[ "${out}" == *'"code":5'* ]]; then
+      echo "index probe saw 5xx result: ${out}" >&2
+      return 1
+    fi
+    if [[ "${out}" == *'"ok":false'* || "${out}" == *'"status":"error"'* ]]; then
+      echo "index probe non-ok payload: ${out}" >&2
+      return 1
+    fi
+    echo "index probe unexpected payload: ${out}" >&2
+    return 1
+  done
+  return 0
 }

 run_daemon_foreground() {
@ -135,19 +213,41 @@ run_daemon_foreground() {
  exec "${AMDUATD_BIN}" --root "${root}" --sock "${SOCK}" --store-backend "${backend}" --space "${SPACE}"
 }

+start_probe_daemon() {
+  echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2
+  "${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" &
+  daemon_pid=$!
+  cleanup_probe() {
+    kill "${daemon_pid}" >/dev/null 2>&1 || true
+  }
+  trap cleanup_probe EXIT
+}
+
+stop_probe_daemon() {
+  kill "${daemon_pid}" >/dev/null 2>&1 || true
+  wait "${daemon_pid}" >/dev/null 2>&1 || true
+  trap - EXIT
+}
+
+repair_index_store() {
+  local root="$1"
+  local backup_root="${root}.bak-$(date +%Y%m%d-%H%M%S)"
+  if [[ -d "${root}" ]]; then
+    echo "backing up index store root to ${backup_root}" >&2
+    mv "${root}" "${backup_root}"
+  fi
+  mkdir -p "${root}"
+  echo "reinitializing index-backed ASL store at ${root}" >&2
+  "${ASL_BIN}" index init --root "${root}" --force
+}
+
 init_store "${STORE_BACKEND}" "${STORE_ROOT}"

 if [[ "${STORE_BACKEND}" != "index" || "${INDEX_BACKEND_PROBE}" != "1" ]]; then
  run_daemon_foreground "${STORE_BACKEND}" "${STORE_ROOT}"
 fi

-echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2
-"${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" &
-daemon_pid=$!
-cleanup_probe() {
-  kill "${daemon_pid}" >/dev/null 2>&1 || true
-}
-trap cleanup_probe EXIT
+start_probe_daemon

 fallback_to_fs=0
 if ! wait_ready "${SOCK}"; then
@ -164,9 +264,27 @@ if [[ "${fallback_to_fs}" == "0" ]]; then
  exit $?
 fi

-kill "${daemon_pid}" >/dev/null 2>&1 || true
-wait "${daemon_pid}" >/dev/null 2>&1 || true
-trap - EXIT
+stop_probe_daemon
+
+if [[ "${INDEX_BACKEND_REPAIR}" == "1" ]]; then
+  echo "attempting index store repair before fs fallback" >&2
+  repair_index_store "${STORE_ROOT}"
+  start_probe_daemon
+
+  repaired_ok=0
+  if wait_ready "${SOCK}" && probe_index_write_path "${SOCK}" "${SPACE}"; then
+    repaired_ok=1
+  fi
+
+  if [[ "${repaired_ok}" == "1" ]]; then
+    trap - EXIT
+    wait "${daemon_pid}"
+    exit $?
+  fi
+
+  echo "index backend repair failed" >&2
+  stop_probe_daemon
+fi

 if [[ "${INDEX_BACKEND_FALLBACK}" != "fs" ]]; then
  echo "set INDEX_BACKEND_FALLBACK=fs to auto-fallback, or INDEX_BACKEND_PROBE=0 to disable probe" >&2
--- a/scripts/smoke_v2.sh
+++ b/scripts/smoke_v2.sh
@ -33,8 +33,8 @@ app_init

 run_id="$(date +%s)"
 idempotency_key="smoke-seed-${run_id}"
-doc_name="smoke-doc-${run_id}"
-topic_name="smoke-topic-${run_id}"
+doc_name="smokedoc${run_id}"
+topic_name="smoketopic${run_id}"
 goal_pred="ms.within_domain"

 step "startup"
--- a/scripts/v2_app.sh
+++ b/scripts/v2_app.sh
@ -11,6 +11,9 @@ usage: $0 COMMAND [args]

 commands:
  startup-check
+  ai-check
+  ai-generate [--json] PROMPT
+  ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
  ingest PAYLOAD_JSON
  sync-once
  consume-changes [--once]
@ -34,6 +37,56 @@ case "${cmd}" in
  startup-check)
    app_startup_checks
    ;;
+  ai-check)
+    app_ai_check
+    ;;
+  ai-generate)
+    output_mode="text"
+    if [[ $# -gt 0 && "$1" == "--json" ]]; then
+      output_mode="json"
+      shift
+    fi
+    if [[ $# -lt 1 ]]; then
+      echo "usage: $0 ai-generate [--json] PROMPT" >&2
+      exit 2
+    fi
+    if [[ "${output_mode}" == "json" ]]; then
+      app_ai_generate_json "$*"
+    else
+      app_ai_generate_text "$*"
+    fi
+    ;;
+  ai-answer)
+    output_mode="text"
+    require_evidence=0
+    while [[ $# -gt 0 ]]; do
+      case "$1" in
+        --json)
+          output_mode="json"
+          shift
+          ;;
+        --require-evidence)
+          require_evidence=1
+          shift
+          ;;
+        *)
+          break
+          ;;
+      esac
+    done
+    if [[ $# -lt 2 || $# -gt 3 ]]; then
+      echo "usage: $0 ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]" >&2
+      exit 2
+    fi
+    roots_csv="$1"
+    question="$2"
+    goals_csv="${3:-}"
+    if [[ "${output_mode}" == "json" ]]; then
+      app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}"
+    else
+      app_ai_answer_text "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}"
+    fi
+    ;;
  ingest)
    if [[ $# -ne 1 ]]; then
      echo "usage: $0 ingest PAYLOAD_JSON" >&2
--- a/src/app_v2.sh
+++ b/src/app_v2.sh
@ -78,6 +78,7 @@ app_retrieve_with_fallback() {
    printf '%s\n' "${AMDUAT_LAST_BODY}"
    return 0
  fi
+  local retrieve_status="${AMDUAT_LAST_STATUS}"

  local first_root
  first_root="$(printf '%s' "${roots_csv}" | awk -F',' '{gsub(/^ +| +$/, "", $1); printf "%s", $1}')"
@ -90,8 +91,19 @@ app_retrieve_with_fallback() {
    fi
  fi

-  amduat_api_call GET "${fallback_path}"
+  if amduat_api_call GET "${fallback_path}"; then
    printf '%s\n' "${AMDUAT_LAST_BODY}"
+    return 0
+  fi
+  local fallback_status="${AMDUAT_LAST_STATUS}"
+
+  if [[ "${retrieve_status}" == "404" && "${fallback_status}" == "404" ]]; then
+    # Return an explicit empty graph so callers can handle no-context deterministically.
+    printf '%s\n' '{"nodes":[],"edges":[],"stats":{"reason":"not_found"}}'
+    return 0
+  fi
+
+  return 1
 }

 app_tombstone_edge() {
@ -101,3 +113,294 @@ app_tombstone_edge() {
  amduat_api_call POST "/v2/graph/edges/tombstone" "${payload}"
  printf '%s\n' "${AMDUAT_LAST_BODY}"
 }
+
+app_ai_check() {
+  local tags
+  tags="$(
+    curl --globoff --silent --show-error \
+      --connect-timeout "${CURL_CONNECT_TIMEOUT_SECONDS}" \
+      --max-time "${OLLAMA_TIMEOUT_SECONDS}" \
+      "${OLLAMA_HOST}/api/tags"
+  )" || {
+    echo "failed to reach ollama at ${OLLAMA_HOST}" >&2
+    return 1
+  }
+
+  printf '%s\n' "${tags}"
+  if [[ "${tags}" != *"\"name\":\"${OLLAMA_MODEL}\""* ]]; then
+    echo "warning: configured model not found in tags: ${OLLAMA_MODEL}" >&2
+  fi
+}
+
+app_ai_generate_json() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "jq is required for ai-generate" >&2
+    return 2
+  fi
+
+  local prompt="$1"
+  local payload
+  payload="$(jq -nc --arg model "${OLLAMA_MODEL}" --arg prompt "${prompt}" \
+    '{model:$model,prompt:$prompt,stream:false}')"
+
+  local out
+  out="$(
+    curl --globoff --silent --show-error \
+      --connect-timeout "${CURL_CONNECT_TIMEOUT_SECONDS}" \
+      --max-time "${OLLAMA_TIMEOUT_SECONDS}" \
+      -H "Content-Type: application/json" \
+      -X POST \
+      --data-binary "${payload}" \
+      "${OLLAMA_HOST}/api/generate"
+  )" || {
+    echo "failed to call ollama generate at ${OLLAMA_HOST}" >&2
+    return 1
+  }
+
+  printf '%s\n' "${out}"
+}
+
+app_ai_generate() {
+  local prompt="$1"
+  app_ai_generate_json "${prompt}"
+}
+
+app_ai_predicate_map_json() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "jq is required for ai-answer" >&2
+    return 2
+  fi
+
+  if [[ "${APP_AI_PREDICATE_MAP_INIT:-0}" == "1" ]]; then
+    if [[ -n "${APP_AI_PREDICATE_MAP_CACHE:-}" ]]; then
+      printf '%s\n' "${APP_AI_PREDICATE_MAP_CACHE}"
+    else
+      printf '{}\n'
+    fi
+    return 0
+  fi
+
+  local schema_json="{}"
+  if amduat_api_call GET "/v2/graph/schema/predicates"; then
+    schema_json="${AMDUAT_LAST_BODY}"
+  fi
+
+  APP_AI_PREDICATE_MAP_CACHE="$(
+    printf '%s' "${schema_json}" | jq -c '
+      def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
+      def entry_from(o):
+        {ref: clean(o.predicate_ref // o.ref // o.predicate // o.id // ""), name: clean(o.alias // o.name // o.predicate // o.label // "")};
+      def pred_list(root):
+        if (root | type) == "array" then root
+        elif (root | type) == "object" then (root.predicates // root.items // root.data // root.results // [])
+        else []
+        end;
+
+      (pred_list(.) | map(entry_from(.)) | map(select(.ref != "" and .name != "")))
+      | reduce .[] as $e ({}; .[$e.ref] = $e.name)
+    ' 2>/dev/null || printf '{}'
+  )"
+  APP_AI_PREDICATE_MAP_INIT=1
+  printf '%s\n' "${APP_AI_PREDICATE_MAP_CACHE}"
+}
+
+app_ai_build_context() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "jq is required for ai-answer" >&2
+    return 2
+  fi
+
+  local retrieve_json="$1"
+  local predicate_map_json="${2:-}"
+  local goals_csv="${3:-}"
+  if [[ -z "${predicate_map_json}" ]]; then
+    predicate_map_json='{}'
+  fi
+  local goals_json="[]"
+  if [[ -n "${goals_csv}" ]]; then
+    goals_json="$(printf '%s' "${goals_csv}" | awk -F',' 'BEGIN{printf "["} {for(i=1;i<=NF;i++){gsub(/^ +| +$/, "", $i); if (length($i)>0){if (printed) printf ","; printf "\"%s\"", $i; printed=1}}} END{printf "]"}')"
+  fi
+  printf '%s' "${retrieve_json}" | jq -r --arg predicate_map_json "${predicate_map_json}" --argjson goals "${goals_json}" '
+    def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
+    def node_name(n): clean(n.name // n.node_ref // n.id // "");
+    ($predicate_map_json | fromjson? // {}) as $predicate_map
+    | ($goals | map(clean(.)) | map(select(length > 0))) as $goal_set
+    | (if ($goal_set | length) == 1 then $goal_set[0] else "" end) as $goal_hint
+    |
+    . as $root
+    | (($root.nodes // []) | reduce .[] as $n ({};
+        if (($n.concept_ref // "") | tostring | length) > 0
+        then .[$n.concept_ref] = (node_name($n))
+        else .
+        end
+      )) as $node_map
+    | def edge_triplet(e):
+        {
+          s: clean(e.subject // e.s // ($node_map[e.subject_ref] // e.subject_ref // "")),
+          p: clean($predicate_map[(e.predicate_ref // "")] // e.predicate // e.p // (if $goal_hint != "" then $goal_hint else empty end) // e.predicate_ref // ""),
+          o: clean(e.object // e.o // ($node_map[e.object_ref] // e.object_ref // ""))
+        };
+
+    {
+        nodes: (($root.nodes // []) | map(node_name(.)) | map(select(length > 0)) | unique | .[0:50]),
+        edges: (($root.edges // []) | map(edge_triplet(.)) | map(select(.s != "" and .p != "" and .o != "")) | .[0:100])
+      }
+    | "Nodes:\n"
+      + (if (.nodes | length) == 0 then "- (none)\n" else ((.nodes[] | "- " + .) + "\n") end)
+      + "Edges:\n"
+      + (if (.edges | length) == 0 then "- (none)\n" else ((.edges[] | "- " + .s + " --" + .p + "--> " + .o) + "\n") end)
+  '
+}
+
+app_ai_extract_evidence_json() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "jq is required for ai-answer" >&2
+    return 2
+  fi
+
+  local retrieve_json="$1"
+  local goals_csv="${2:-}"
+  local predicate_map_json="${3:-}"
+  if [[ -z "${predicate_map_json}" ]]; then
+    predicate_map_json='{}'
+  fi
+  local goals_json="[]"
+  if [[ -n "${goals_csv}" ]]; then
+    goals_json="$(printf '%s' "${goals_csv}" | awk -F',' 'BEGIN{printf "["} {for(i=1;i<=NF;i++){gsub(/^ +| +$/, "", $i); if (length($i)>0){if (printed) printf ","; printf "\"%s\"", $i; printed=1}}} END{printf "]"}')"
+  fi
+
+  printf '%s' "${retrieve_json}" | jq -c --argjson goals "${goals_json}" --arg predicate_map_json "${predicate_map_json}" --argjson max_edges "${AI_EVIDENCE_MAX_EDGES}" '
+    def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
+    ($predicate_map_json | fromjson? // {}) as $predicate_map
+    |
+    . as $root
+    | (($root.nodes // []) | reduce .[] as $n ({};
+        if (($n.concept_ref // "") | tostring | length) > 0
+        then .[$n.concept_ref] = clean($n.name // $n.node_ref // $n.id // $n.concept_ref)
+        else .
+        end
+      )) as $node_map
+    | (($root.explanations // []) | reduce .[] as $x ({}; .[$x.edge_ref] = {depth: ($x.depth // null), reasons: ($x.reasons // [])})) as $exp_map
+    | ($goals | map(clean(.)) | map(select(length > 0))) as $goal_set
+    | (($root.edges // [])
+    | map({
+        edge_ref: clean(.edge_ref // ""),
+        subject: clean(.subject // .s // ($node_map[.subject_ref] // .subject_ref // "")),
+        predicate: clean($predicate_map[(.predicate_ref // "")] // .predicate // .p // .predicate_ref // ""),
+        predicate_name: clean($predicate_map[(.predicate_ref // "")] // .predicate // .p // ""),
+        object: clean(.object // .o // ($node_map[.object_ref] // .object_ref // "")),
+        predicate_ref: clean(.predicate_ref // ""),
+        subject_ref: clean(.subject_ref // ""),
+        object_ref: clean(.object_ref // "")
+      })
+    | map(select(.subject != "" and .predicate != "" and .object != ""))
+    | map(. + ($exp_map[.edge_ref] // {depth:null,reasons:[]}))) as $all_edges
+    | ($all_edges | map(
+        if ($goal_set | length) == 0 then .
+        else select((.predicate as $p | $goal_set | index($p)) != null or (.predicate_ref as $pr | $goal_set | index($pr)) != null)
+        end
+      )) as $filtered_edges
+    | (if ($goal_set | length) > 0 and ($filtered_edges | length) == 0 then $all_edges else $filtered_edges end) as $selected_edges
+    | (if ($goal_set | length) == 1 then $goal_set[0] else "" end) as $goal_hint
+    | ($selected_edges | map(
+        if .predicate_name == "" and $goal_hint != "" then . + {predicate_name:$goal_hint, predicate:$goal_hint}
+        else .
+        end
+      ))
+    | .[0:$max_edges]
+  '
+}
+
+app_ai_answer_json() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "jq is required for ai-answer" >&2
+    return 2
+  fi
+
+  local roots_csv="$1"
+  local question="$2"
+  local goals_csv="${3:-}"
+  local require_evidence="${4:-0}"
+
+  local retrieve_out
+  retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")" || return 1
+
+  local predicate_map_json
+  predicate_map_json="$(app_ai_predicate_map_json)" || return $?
+
+  local has_context
+  has_context="$(printf '%s' "${retrieve_out}" | jq -r '((.nodes // []) | length) + ((.edges // []) | length) > 0')"
+  local evidence
+  evidence="$(app_ai_extract_evidence_json "${retrieve_out}" "${goals_csv}" "${predicate_map_json}")" || return $?
+  local evidence_count
+  evidence_count="$(printf '%s' "${evidence}" | jq -r 'length')"
+  local has_evidence="false"
+  if [[ "${evidence_count}" -gt 0 ]]; then
+    has_evidence="true"
+  fi
+  if [[ "${has_context}" != "true" ]]; then
+    jq -nc --arg model "${OLLAMA_MODEL}" \
+      --arg response "Insufficient graph context for the requested roots/predicates. Ingest or reference existing nodes/edges first." \
+      --argjson evidence "${evidence}" \
+      --argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" \
+      '{model:$model,response:$response,done:true,done_reason:"no_context",evidence:$evidence,grounding:{has_evidence:false,require_evidence:$require_evidence}}'
+    return 0
+  fi
+
+  if [[ "${require_evidence}" == "1" && "${has_evidence}" != "true" ]]; then
+    jq -nc --arg model "${OLLAMA_MODEL}" \
+      --arg response "No supporting graph evidence found for the requested roots/predicates." \
+      --argjson evidence "${evidence}" \
+      '{model:$model,response:$response,done:true,done_reason:"no_evidence",evidence:$evidence,grounding:{has_evidence:false,require_evidence:true}}'
+    return 1
+  fi
+
+  local context
+  context="$(app_ai_build_context "${retrieve_out}" "${predicate_map_json}" "${goals_csv}")" || return $?
+  context="$(printf '%s' "${context}" | head -c "${AI_CONTEXT_MAX_CHARS}")"
+
+  local prompt
+  prompt="$(cat <<PROMPT
+Answer the question using ONLY the graph context below.
+If the context is insufficient, say exactly what is missing.
+Keep the answer concise.
+
+Graph context:
+${context}
+
+Question:
+${question}
+PROMPT
+)"
+
+  local model_out
+  model_out="$(app_ai_generate_json "${prompt}")" || return $?
+  printf '%s' "${model_out}" | jq -c --argjson evidence "${evidence}" --argjson has_evidence "${has_evidence}" --argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" '. + {evidence:$evidence, grounding:{has_evidence:$has_evidence, require_evidence:$require_evidence}}'
+}
+
+app_ai_answer_text() {
+  local roots_csv="$1"
+  local question="$2"
+  local goals_csv="${3:-}"
+  local require_evidence="${4:-0}"
+  local out
+  out="$(app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}")" || return $?
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "${out}"
+    return 0
+  fi
+  printf '%s' "${out}" | jq -r '.response // empty'
+  printf '\n'
+}
+
+app_ai_generate_text() {
+  local prompt="$1"
+  local out
+  out="$(app_ai_generate_json "${prompt}")" || return $?
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "${out}"
+    return 0
+  fi
+  printf '%s' "${out}" | jq -r '.response // empty'
+  printf '\n'
+}
--- a/src/config.sh
+++ b/src/config.sh
@ -15,6 +15,11 @@ amduat_config_load() {
  local override_retry_max_ms="${RETRY_MAX_MS:-}"
  local override_connect_timeout="${CURL_CONNECT_TIMEOUT_SECONDS:-}"
  local override_max_time="${CURL_MAX_TIME_SECONDS:-}"
+  local override_ollama_host="${OLLAMA_HOST:-}"
+  local override_ollama_model="${OLLAMA_MODEL:-}"
+  local override_ollama_timeout="${OLLAMA_TIMEOUT_SECONDS:-}"
+  local override_ai_context_max_chars="${AI_CONTEXT_MAX_CHARS:-}"
+  local override_ai_evidence_max_edges="${AI_EVIDENCE_MAX_EDGES:-}"

  local env_file="${root_dir}/config/env.local"
  if [[ ! -f "${env_file}" ]]; then
@ -34,6 +39,11 @@ amduat_config_load() {
  if [[ -n "${override_retry_max_ms}" ]]; then RETRY_MAX_MS="${override_retry_max_ms}"; fi
  if [[ -n "${override_connect_timeout}" ]]; then CURL_CONNECT_TIMEOUT_SECONDS="${override_connect_timeout}"; fi
  if [[ -n "${override_max_time}" ]]; then CURL_MAX_TIME_SECONDS="${override_max_time}"; fi
+  if [[ -n "${override_ollama_host}" ]]; then OLLAMA_HOST="${override_ollama_host}"; fi
+  if [[ -n "${override_ollama_model}" ]]; then OLLAMA_MODEL="${override_ollama_model}"; fi
+  if [[ -n "${override_ollama_timeout}" ]]; then OLLAMA_TIMEOUT_SECONDS="${override_ollama_timeout}"; fi
+  if [[ -n "${override_ai_context_max_chars}" ]]; then AI_CONTEXT_MAX_CHARS="${override_ai_context_max_chars}"; fi
+  if [[ -n "${override_ai_evidence_max_edges}" ]]; then AI_EVIDENCE_MAX_EDGES="${override_ai_evidence_max_edges}"; fi

  SOCK="${SOCK:-amduatd.sock}"
  BASE="${BASE:-http://localhost}"
@ -52,4 +62,10 @@ amduat_config_load() {

  CURL_CONNECT_TIMEOUT_SECONDS="${CURL_CONNECT_TIMEOUT_SECONDS:-2}"
  CURL_MAX_TIME_SECONDS="${CURL_MAX_TIME_SECONDS:-30}"
+
+  OLLAMA_HOST="${OLLAMA_HOST:-http://127.0.0.1:11434}"
+  OLLAMA_MODEL="${OLLAMA_MODEL:-qwen2.5-coder:7b}"
+  OLLAMA_TIMEOUT_SECONDS="${OLLAMA_TIMEOUT_SECONDS:-60}"
+  AI_CONTEXT_MAX_CHARS="${AI_CONTEXT_MAX_CHARS:-12000}"
+  AI_EVIDENCE_MAX_EDGES="${AI_EVIDENCE_MAX_EDGES:-5}"
 }
--- a/tests/ai_answer_eval.sh
+++ b/tests/ai_answer_eval.sh
@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+# shellcheck source=/dev/null
+source "${ROOT_DIR}/src/app_v2.sh"
+
+require_jq() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "ai_answer_eval.sh: jq is required" >&2
+    exit 2
+  fi
+}
+
+fail() {
+  echo "ai_answer_eval.sh: FAIL: $1" >&2
+  exit 1
+}
+
+require_jq
+app_init
+
+tmp_dir="$(mktemp -d /tmp/ai-answer-eval.XXXXXX)"
+cleanup() {
+  rm -rf "${tmp_dir}"
+}
+trap cleanup EXIT
+
+mock_prompt_file="${tmp_dir}/prompt.txt"
+mock_retrieve_path_file="${tmp_dir}/retrieve.path"
+mock_retrieve_payload_file="${tmp_dir}/retrieve.payload"
+
+amduat_api_call() {
+  local method="$1"
+  local path="$2"
+  local body="${3:-}"
+
+  if [[ "${method}" == "GET" && "${path}" == "/v2/graph/schema/predicates" ]]; then
+    AMDUAT_LAST_STATUS="200"
+    AMDUAT_LAST_BODY='{"predicates":[{"predicate_ref":"ref-ms-within-domain","alias":"ms.within_domain"}]}'
+    return 0
+  fi
+
+  if [[ "${method}" == "POST" && "${path}" == "/v2/graph/retrieve" ]]; then
+    printf '%s' "${path}" > "${mock_retrieve_path_file}"
+    printf '%s' "${body}" > "${mock_retrieve_payload_file}"
+    AMDUAT_LAST_STATUS="200"
+    if [[ "${MOCK_NO_EDGES:-0}" == "1" ]]; then
+      AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1"},{"name":"topic:alpha"}],"edges":[]}'
+    else
+      AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1","concept_ref":"ref-doc1"},{"name":"topic:alpha","concept_ref":"ref-topic-alpha"}],"edges":[{"subject_ref":"ref-doc1","predicate_ref":"ref-ms-within-domain","object_ref":"ref-topic-alpha","edge_ref":"ref-edge-1"}]}'
+    fi
+    return 0
+  fi
+
+  AMDUAT_LAST_STATUS="404"
+  AMDUAT_LAST_BODY='{"error":"not mocked"}'
+  return 1
+}
+
+curl() {
+  local body=""
+  local endpoint=""
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --data-binary)
+        body="$2"
+        shift 2
+        ;;
+      http://*|https://*)
+        endpoint="$1"
+        shift
+        ;;
+      *)
+        shift
+        ;;
+    esac
+  done
+
+  [[ "${endpoint}" == "${OLLAMA_HOST}/api/generate" ]] || fail "unexpected curl endpoint: ${endpoint}"
+  prompt="$(printf '%s' "${body}" | jq -r '.prompt')"
+  printf '%s' "${prompt}" > "${mock_prompt_file}"
+  printf '%s\n' '{"model":"mock-model","response":"Grounded answer from mock model."}'
+}
+
+json_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_json failed"
+printf '%s' "${json_out}" | jq -e '.response == "Grounded answer from mock model."' >/dev/null || fail "unexpected response payload"
+printf '%s' "${json_out}" | jq -e '.evidence | length == 1' >/dev/null || fail "missing evidence"
+printf '%s' "${json_out}" | jq -e '.grounding.has_evidence == true' >/dev/null || fail "grounding.has_evidence should be true"
+printf '%s' "${json_out}" | jq -e '.evidence[0].subject == "doc:1" and .evidence[0].predicate == "ms.within_domain" and .evidence[0].object == "topic:alpha"' >/dev/null \
+  || fail "evidence triplet mismatch"
+printf '%s' "${json_out}" | jq -e '.evidence[0].predicate_name == "ms.within_domain"' >/dev/null || fail "predicate_name should resolve from schema"
+
+[[ -f "${mock_retrieve_path_file}" ]] || fail "retrieve call was not made"
+[[ "$(cat "${mock_retrieve_path_file}")" == "/v2/graph/retrieve" ]] || fail "retrieve path mismatch"
+
+retrieve_payload="$(cat "${mock_retrieve_payload_file}")"
+printf '%s' "${retrieve_payload}" | jq -e '.roots == ["doc:1"]' >/dev/null || fail "roots payload mismatch"
+printf '%s' "${retrieve_payload}" | jq -e '.goal_predicates == ["ms.within_domain"]' >/dev/null || fail "goal_predicates payload mismatch"
+
+prompt_text="$(cat "${mock_prompt_file}")"
+[[ "${prompt_text}" == *"Question:"* ]] || fail "prompt missing question label"
+[[ "${prompt_text}" == *"What domain is doc:1 in?"* ]] || fail "prompt missing question"
+[[ "${prompt_text}" == *"doc:1 --ms.within_domain--> topic:alpha"* ]] || fail "prompt missing graph edge context"
+
+text_out="$(app_ai_answer_text "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_text failed"
+[[ "${text_out}" == "Grounded answer from mock model." ]] || fail "text output mismatch"
+
+set +e
+MOCK_NO_EDGES=1 strict_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain" "1")"
+strict_rc=$?
+set -e
+[[ "${strict_rc}" -ne 0 ]] || fail "expected non-zero for --require-evidence with no supporting edges"
+printf '%s' "${strict_out}" | jq -e '.done_reason == "no_evidence"' >/dev/null || fail "expected done_reason no_evidence"
+printf '%s' "${strict_out}" | jq -e '.grounding.require_evidence == true and .grounding.has_evidence == false' >/dev/null \
+  || fail "expected strict grounding flags"
+
+echo "ai_answer_eval.sh: PASS"
--- a/tests/ai_eval.sh
+++ b/tests/ai_eval.sh
@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+# shellcheck source=/dev/null
+source "${ROOT_DIR}/src/app_v2.sh"
+
+require_jq() {
+  if ! command -v jq >/dev/null 2>&1; then
+    echo "ai_eval.sh: jq is required" >&2
+    exit 2
+  fi
+}
+
+fail() {
+  echo "ai_eval.sh: FAIL: $1" >&2
+  exit 1
+}
+
+app_init
+require_jq
+
+tags_out="$(app_ai_check)" || fail "ai-check failed"
+printf '%s' "${tags_out}" | jq -e '.models | type == "array"' >/dev/null || fail "tags response missing models array"
+printf '%s' "${tags_out}" | jq -e --arg model "${OLLAMA_MODEL}" '.models[] | select(.name == $model)' >/dev/null \
+  || fail "configured model not present: ${OLLAMA_MODEL}"
+
+prompt="Return one short sentence describing graph retrieval testing."
+gen_out="$(app_ai_generate "${prompt}")" || fail "ai-generate failed"
+
+printf '%s' "${gen_out}" | jq -e '.response | type == "string"' >/dev/null || fail "generate response missing text"
+printf '%s' "${gen_out}" | jq -e '.model | type == "string"' >/dev/null || fail "generate response missing model"
+
+response_text="$(printf '%s' "${gen_out}" | jq -r '.response')"
+[[ -n "${response_text//[[:space:]]/}" ]] || fail "generate response text is empty"
+
+echo "ai_eval.sh: PASS"
--- a/tests/integration_v2.sh
+++ b/tests/integration_v2.sh
@ -40,8 +40,8 @@ assert_contains "${startup_out}" '"ok"'
 run_id="$(date +%s)"
 trace_id="trace-it-${run_id}"
 idempotency_key="it-seed-${run_id}"
-doc_name="doc-it${run_id}"
-topic_name="topic-italpha${run_id}"
+doc_name="docit${run_id}"
+topic_name="topicitalpha${run_id}"
 payload="$(cat <<JSON
 {
  "idempotency_key":"${idempotency_key}",
@ -81,6 +81,23 @@ assert_contains "${sync_out}" '"events"'
 retrieve_out="$(app_retrieve_with_fallback "${doc_name}" "ms.within_domain")"
 assert_contains "${retrieve_out}" '"edges"'

+# 4b) optional live AI-over-retrieval path (requires reachable Ollama)
+if [[ "${RUN_AI_RETRIEVE_LIVE:-0}" == "1" ]]; then
+  ai_answer_out="$(app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain")"
+  printf '%s' "${ai_answer_out}" | jq -e '.response | type == "string"' >/dev/null || {
+    echo "expected ai-answer to return JSON with response text" >&2
+    exit 1
+  }
+  printf '%s' "${ai_answer_out}" | jq -e '.evidence | type == "array"' >/dev/null || {
+    echo "expected ai-answer to include evidence array" >&2
+    exit 1
+  }
+  app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain" "1" >/dev/null || {
+    echo "expected strict ai-answer to succeed when evidence exists" >&2
+    exit 1
+  }
+fi
+
 # Capture edge_ref using subgraph surface to avoid format differences.
 subgraph_out="$(amduat_api_call GET "/v2/graph/subgraph?roots[]=${doc_name}&max_depth=2&dir=outgoing&limit_nodes=200&limit_edges=400&include_stats=true&max_result_bytes=1048576" && printf '%s' "${AMDUAT_LAST_BODY}")"
 edge_ref="$(printf '%s' "${subgraph_out}" | jq -r '.edges[0].edge_ref // empty')"