Commit local script and AI workflow updates

2026-02-08 07:55:43 +01:00 · 2026-02-08 07:55:43 +01:00 · 92aa3b6a3f
parent 7af01c8a53
commit 92aa3b6a3f
11 changed files with 612 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -52,6 +52,11 @@ Use the integrated v2 app flow wrapper:
 ```sh
 ./scripts/v2_app.sh startup-check
 ./scripts/v2_app.sh ai-vertical-slice
 ./scripts/v2_app.sh ai-vertical-slice --skip-evals
 ./scripts/v2_app.sh ai-vertical-slice --auto-start-daemon
 ./scripts/v2_app.sh ai-agent 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'
 ./scripts/v2_app.sh ai-agent --json --require-evidence --max-steps 3 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'
 ./scripts/v2_app.sh ai-check
 ./scripts/v2_app.sh ai-generate 'Summarize retrieval behavior in one sentence.'
 ./scripts/v2_app.sh ai-generate --json 'Summarize retrieval behavior in one sentence.'
@ -65,6 +70,12 @@ Use the integrated v2 app flow wrapper:
 ./scripts/v2_app.sh tombstone '<edge_ref>'
 ```
 AI lane notes:
 - Plan and scope guardrails: `docs/ai-plan.md`
 - Deterministic seed payload: `ai/fixtures/seed_batch.json`
 - Agent loop checkpoints: `ai/runs/agent-run-*.json`
 Run integration coverage (requires running `amduatd` + `jq`):
 ```sh
--- a/ai/fixtures/seed_batch.json
+++ b/ai/fixtures/seed_batch.json
@ -0,0 +1,26 @@
 {
  "idempotency_key": "ai-slice-seed",
  "mode": "continue_on_error",
  "nodes": [
    {
      "name": "doc-ai-1"
    },
    {
      "name": "topic-ai-alpha"
    }
  ],
  "edges": [
    {
      "subject": "doc-ai-1",
      "predicate": "ms.within_domain",
      "object": "topic-ai-alpha",
      "provenance": {
        "source_uri": "urn:app:ai-seed",
        "extractor": "ai-slice-loader",
        "observed_at": 1,
        "ingested_at": 2,
        "trace_id": "trace-ai-seed-1"
      }
    }
  ]
 }
--- a/ai/runs/agent-run-20260208-071138-238480.json
+++ b/ai/runs/agent-run-20260208-071138-238480.json
@ -0,0 +1 @@
 {"run_id":"20260208-071138-238480","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"retrieve_failed","steps":[],"final_answer":{"response":"Agent loop ended without answer (retrieve_failed).","done_reason":"agent_stopped"}}
--- a/ai/runs/agent-run-20260208-071315-239391.json
+++ b/ai/runs/agent-run-20260208-071315-239391.json
@ -0,0 +1 @@
 {"run_id":"20260208-071315-239391","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"planner_stop","steps":[{"step":1,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":0,"edges":0},"plan":{"action":"refine_query","next_roots_csv":"","next_goals_csv":"","reason":"Insufficient retrieval context to determine the domain of doc-ai-1."}},{"step":2,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":1,"edges":0},"plan":{"action":"stop","next_roots_csv":"","next_goals_csv":"","reason":"Insufficient context to determine the domain of doc-ai-1."}}],"final_answer":{"response":"Agent loop ended without answer (planner_stop).","done_reason":"agent_stopped"}}
--- a/ai/runs/agent-run-20260208-071727-240673.json
+++ b/ai/runs/agent-run-20260208-071727-240673.json
@ -0,0 +1 @@
 {"run_id":"20260208-071727-240673","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"answered","steps":[{"step":1,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":2,"edges":1},"plan":{"action":"answer","next_roots_csv":"","next_goals_csv":"","reason":"The current context provides sufficient information to answer the question."}}],"final_answer":{"model":"qwen2.5-coder:7b","created_at":"2026-02-08T06:17:29.568796336Z","response":"topic-ai-alpha","done":true,"done_reason":"stop","context":[151644,8948,198,2610,525,1207,16948,11,3465,553,54364,14817,13,1446,525,264,10950,17847,13,151645,198,151644,872,198,16141,279,3405,1667,26687,279,4771,2266,3685,624,2679,279,2266,374,38313,11,1977,6896,1128,374,7402,624,19434,279,4226,63594,382,11212,2266,510,12288,510,12,4629,12,2143,12,16,198,41122,510,12,4629,12,2143,12,16,1177,1011,18164,258,20111,29052,8544,12,2143,64538,271,12288,510,12,8544,12,2143,64538,198,41122,510,12,4629,12,2143,12,16,1177,1011,18164,258,20111,29052,8544,12,2143,64538,271,14582,510,3838,7947,374,4629,12,2143,12,16,304,30,151645,198,151644,77091,198,16411,12,2143,64538],"total_duration":175968138,"load_duration":92265198,"prompt_eval_count":126,"prompt_eval_duration":13491488,"eval_count":5,"eval_duration":57138095,"evidence":[{"edge_ref":"00012d3c287ce8af9c400519c995041274d14319bccc204303aa97768706d90ddc87","subject":"doc-ai-1","predicate":"ms.within_domain","predicate_name":"ms.within_domain","object":"topic-ai-alpha","predicate_ref":"000140ebd9f62d224d780ebf2da668c3175fd15fc2bcceba9a99df1be4c5184329bb","subject_ref":"0001ccff97484870025da6d1b7b417f4678c9e5e541b2bebe80289ffdc07505b7c26","object_ref":"00018a5ed5c3c89fc445549fd5c917b1ccf1165faef6508ad886776cdd9553f437a7","depth":1,"reasons":["reachable_from_roots","goal_predicate_match"]}],"grounding":{"has_evidence":true,"require_evidence":false}}}
--- a/docs/ai-plan.md
+++ b/docs/ai-plan.md
@ -0,0 +1,45 @@
 # AI v2 Plan
 ## Goal
 Ship one reliable AI vertical slice on top of the v2 graph API:
 1. ingest deterministic graph facts,
 2. retrieve graph context for a root,
 3. answer with grounding evidence,
 4. execute a minimal planner loop with persisted run state.
 ## Scope Rules
 - Prioritize app-level AI workflow work in this repo.
 - Treat backend fault investigation as out-of-scope unless it blocks the vertical slice.
 - Keep `vendor/amduat-api` pinned while iterating on prompts/evals.
 ## Working Lane
 - Use branch: `feat/ai-v2-experiments`.
 - Keep core command stable: `./scripts/v2_app.sh ai-vertical-slice`.
 - Track prompt/eval tweaks under `ai/`.
 ## Acceptance Criteria
 - `./scripts/v2_app.sh ai-vertical-slice` passes on a running daemon with Ollama.
 - Output contains non-empty answer text with `grounding.has_evidence == true`.
 - `tests/ai_eval.sh` and `tests/ai_answer_eval.sh` pass in the same environment.
 - `./scripts/v2_app.sh ai-agent --json 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'` writes checkpoint state under `ai/runs/`.
 ## Quick Run Sequence
 1. Start daemon (or let the vertical slice auto-start it):
 `./scripts/dev_start_daemon.sh`
 2. Run AI vertical slice:
 `./scripts/v2_app.sh ai-vertical-slice`
 3. If daemon may not be running, use:
 `./scripts/v2_app.sh ai-vertical-slice --auto-start-daemon`
 4. Run minimal agent loop:
 `./scripts/v2_app.sh ai-agent --json --auto-start-daemon 'doc-ai-1' 'What domain is doc-ai-1 in?' 'ms.within_domain'`
 ## Stop Conditions
 - If startup, ingest, or retrieve fails due to backend regression, log the failure and pause AI iteration until fixed.
 - Do not switch scope to broad backend cleanup without an explicit decision.
--- a/scripts/ai_agent_loop.sh
+++ b/scripts/ai_agent_loop.sh
@ -0,0 +1,302 @@
 #!/usr/bin/env bash
 set -euo pipefail
 ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 # shellcheck source=/dev/null
 source "${ROOT_DIR}/src/app_v2.sh"
 usage() {
  cat <<USAGE
 usage: $0 [--json] [--require-evidence] [--max-steps N] [--state-file PATH] [--auto-start-daemon] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
 Minimal agent loop v1:
  1) retrieve context
  2) ask model for next action (answer/refine_query/stop)
  3) optionally refine roots/goals and repeat
  4) produce grounded answer and persist run checkpoint
 Options:
  --json                 print full final JSON payload
  --require-evidence     fail if no supporting edges are found
  --max-steps N          planner iterations before answer fallback (default: 3)
  --state-file PATH      write run state to this path
  --auto-start-daemon    start daemon if startup checks fail
 USAGE
 }
 require_jq() {
  if ! command -v jq >/dev/null 2>&1; then
    echo "ai_agent_loop.sh: jq is required" >&2
    exit 2
  fi
 }
 ensure_daemon_ready() {
  if app_startup_checks >/dev/null 2>&1; then
    return 0
  fi
  if [[ "${auto_start_daemon}" == "1" ]]; then
    local daemon_backend="${AI_DAEMON_STORE_BACKEND:-fs}"
    local daemon_root="${AI_DAEMON_STORE_ROOT:-/tmp/amduat-asl-ai-agent}"
    local daemon_log="${AI_DAEMON_LOG_PATH:-/tmp/ai-agent-daemon.log}"
    echo "daemon not reachable; attempting startup via scripts/dev_start_daemon.sh" >&2
    STORE_BACKEND="${daemon_backend}" STORE_ROOT="${daemon_root}" SOCK="${SOCK}" SPACE="${SPACE}" \
      nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${daemon_log}" 2>&1 &
    local daemon_boot_pid="$!"
    disown "${daemon_boot_pid}" 2>/dev/null || true
    local i
    for i in $(seq 1 80); do
      if app_startup_checks >/dev/null 2>&1; then
        return 0
      fi
      sleep 0.1
    done
    app_startup_checks >/dev/null 2>&1 || {
      echo "ai_agent_loop.sh: daemon still unreachable after startup attempt" >&2
      echo "see ${daemon_log} for startup logs" >&2
      return 1
    }
    return 0
  fi
  echo "ai_agent_loop.sh: daemon unreachable on SOCK=${SOCK}" >&2
  echo "hint: run ./scripts/dev_start_daemon.sh or pass --auto-start-daemon" >&2
  return 1
 }
 append_step() {
  local step_json="$1"
  steps_json="$(jq -c --argjson step "${step_json}" '. + [$step]' <<<"${steps_json}")"
 }
 extract_plan_json() {
  local model_out="$1"
  local raw_plan
  raw_plan="$(jq -r '.response // ""' <<<"${model_out}")"
  local normalized_plan
  normalized_plan="$(printf '%s\n' "${raw_plan}" \
    | sed -e '1s/^```[[:alnum:]_-]*[[:space:]]*$//' -e '$s/^```[[:space:]]*$//')"
  local parsed_plan
  parsed_plan="$(printf '%s' "${normalized_plan}" | jq -c '
    if type == "object" then .
    else {"action":"answer","reason":"planner_non_object"}
    end
  ' 2>/dev/null || printf '%s' '{"action":"answer","reason":"planner_parse_error"}')"
  jq -c '
    def clean_csv(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";""));
    . as $r
    | {
        action: (
          ($r.action // "answer" | tostring) as $a
          | if ($a == "answer" or $a == "refine_query" or $a == "stop") then $a else "answer" end
        ),
        next_roots_csv: clean_csv($r.next_roots_csv // ""),
        next_goals_csv: clean_csv($r.next_goals_csv // ""),
        reason: clean_csv($r.reason // "")
      }
  ' <<<"${parsed_plan}"
 }
 plan_next_action() {
  local question="$1"
  local roots_csv="$2"
  local goals_csv="$3"
  local retrieve_json="$4"
  local step_no="$5"
  local context_stats
  context_stats="$(jq -c '{nodes:(.nodes // [] | length), edges:(.edges // [] | length)}' <<<"${retrieve_json}")"
  local prompt
  prompt="$(cat <<PROMPT
 You are an execution planner for a graph-grounded QA agent.
 Decide the NEXT action only.
 Rules:
 - Return STRICT JSON object only.
 - action must be one of: "answer", "refine_query", "stop".
 - Use "refine_query" only if retrieval context is clearly insufficient.
 - Keep next_roots_csv / next_goals_csv empty unless refining.
 - Do not include markdown or prose outside JSON.
 Current step: ${step_no}
 Question: ${question}
 Current roots_csv: ${roots_csv}
 Current goals_csv: ${goals_csv}
 Context stats: ${context_stats}
 Required JSON schema:
 {"action":"answer|refine_query|stop","next_roots_csv":"","next_goals_csv":"","reason":""}
 PROMPT
 )"
  local plan_model_out
  plan_model_out="$(app_ai_generate_json "${prompt}")"
  extract_plan_json "${plan_model_out}"
 }
 output_mode="text"
 require_evidence=0
 max_steps=3
 auto_start_daemon=0
 state_file=""
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --json)
      output_mode="json"
      shift
      ;;
    --require-evidence)
      require_evidence=1
      shift
      ;;
    --max-steps)
      [[ $# -ge 2 ]] || { usage >&2; exit 2; }
      max_steps="$2"
      shift 2
      ;;
    --state-file)
      [[ $# -ge 2 ]] || { usage >&2; exit 2; }
      state_file="$2"
      shift 2
      ;;
    --auto-start-daemon)
      auto_start_daemon=1
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    --)
      shift
      break
      ;;
    -*)
      usage >&2
      exit 2
      ;;
    *)
      break
      ;;
  esac
 done
 if [[ $# -lt 2 || $# -gt 3 ]]; then
  usage >&2
  exit 2
 fi
 roots_csv="$1"
 question="$2"
 goals_csv="${3:-}"
 if ! [[ "${max_steps}" =~ ^[0-9]+$ ]] || [[ "${max_steps}" -lt 1 ]] || [[ "${max_steps}" -gt 8 ]]; then
  echo "ai_agent_loop.sh: --max-steps must be integer in [1,8]" >&2
  exit 2
 fi
 require_jq
 app_init
 ensure_daemon_ready
 run_id="$(date +%Y%m%d-%H%M%S)-$$"
 if [[ -z "${state_file}" ]]; then
  mkdir -p "${ROOT_DIR}/ai/runs"
  state_file="${ROOT_DIR}/ai/runs/agent-run-${run_id}.json"
 fi
 steps_json="[]"
 final_answer_json=""
 stop_reason="max_steps_reached"
 step_no=1
 while (( step_no <= max_steps )); do
  retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")" || {
    stop_reason="retrieve_failed"
    break
  }
  context_stats="$(jq -c '{nodes:(.nodes // [] | length), edges:(.edges // [] | length)}' <<<"${retrieve_out}")"
  plan_json="$(plan_next_action "${question}" "${roots_csv}" "${goals_csv}" "${retrieve_out}" "${step_no}")"
  plan_action="$(jq -r '.action' <<<"${plan_json}")"
  next_roots="$(jq -r '.next_roots_csv // ""' <<<"${plan_json}")"
  next_goals="$(jq -r '.next_goals_csv // ""' <<<"${plan_json}")"
  step_record="$(jq -nc \
    --argjson step "${step_no}" \
    --arg roots_csv "${roots_csv}" \
    --arg goals_csv "${goals_csv}" \
    --argjson context "${context_stats}" \
    --argjson plan "${plan_json}" \
    '{step:$step,roots_csv:$roots_csv,goals_csv:$goals_csv,context:$context,plan:$plan}')"
  append_step "${step_record}"
  if [[ "${plan_action}" == "refine_query" ]]; then
    if [[ -n "${next_roots}" ]]; then
      roots_csv="${next_roots}"
    fi
    if [[ -n "${next_goals}" ]]; then
      goals_csv="${next_goals}"
    fi
    step_no=$(( step_no + 1 ))
    continue
  fi
  if [[ "${plan_action}" == "stop" ]]; then
    stop_reason="planner_stop"
    break
  fi
  if final_answer_json="$(app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}")"; then
    stop_reason="answered"
    break
  fi
  stop_reason="answer_failed"
  break
 done
 if [[ -z "${final_answer_json}" ]]; then
  final_answer_json="$(jq -nc --arg msg "Agent loop ended without answer (${stop_reason})." '{response:$msg,done_reason:"agent_stopped"}')"
 fi
 run_json="$(jq -nc \
  --arg run_id "${run_id}" \
  --arg question "${question}" \
  --arg initial_roots_csv "$1" \
  --arg initial_goals_csv "${3:-}" \
  --arg final_roots_csv "${roots_csv}" \
  --arg final_goals_csv "${goals_csv}" \
  --arg stop_reason "${stop_reason}" \
  --argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" \
  --argjson steps "${steps_json}" \
  --argjson final_answer "${final_answer_json}" \
  '{
      run_id:$run_id,
      input:{
        question:$question,
        roots_csv:$initial_roots_csv,
        goals_csv:$initial_goals_csv,
        require_evidence:$require_evidence
      },
      final_query:{
        roots_csv:$final_roots_csv,
        goals_csv:$final_goals_csv
      },
      stop_reason:$stop_reason,
      steps:$steps,
      final_answer:$final_answer
    }')"
 printf '%s\n' "${run_json}" > "${state_file}"
 if [[ "${output_mode}" == "json" ]]; then
  printf '%s\n' "${run_json}"
 else
  jq -r '.final_answer.response // "No response"' <<<"${run_json}"
  echo
  echo "state_file=${state_file}"
 fi
--- a/scripts/ai_vertical_slice.sh
+++ b/scripts/ai_vertical_slice.sh
@ -0,0 +1,141 @@
 #!/usr/bin/env bash
 set -euo pipefail
 ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 # shellcheck source=/dev/null
 source "${ROOT_DIR}/src/app_v2.sh"
 usage() {
  cat <<USAGE
 usage: $0 [--skip-evals] [--auto-start-daemon]
 Runs the AI vertical slice:
  1) startup checks
  2) seed graph ingest from ai/fixtures/seed_batch.json
  3) retrieve grounding context
  4) generate grounded answer (require evidence)
  5) optional eval scripts
 Environment overrides:
  AI_SLICE_FIXTURE_PATH      (default: ai/fixtures/seed_batch.json)
  AI_SLICE_ROOTS_CSV         (default: doc-ai-1)
  AI_SLICE_GOALS_CSV         (default: ms.within_domain)
  AI_SLICE_QUESTION          (default: What domain is doc-ai-1 in?)
  AI_SLICE_SKIP_EVALS        (default: 0)
  AI_SLICE_AUTO_START_DAEMON (default: 0)
 USAGE
 }
 require_jq() {
  if ! command -v jq >/dev/null 2>&1; then
    echo "ai_vertical_slice.sh: jq is required" >&2
    exit 2
  fi
 }
 skip_evals="${AI_SLICE_SKIP_EVALS:-0}"
 auto_start_daemon="${AI_SLICE_AUTO_START_DAEMON:-0}"
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --skip-evals)
      skip_evals=1
      shift
      ;;
    --auto-start-daemon)
      auto_start_daemon=1
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      usage >&2
      exit 2
      ;;
  esac
 done
 fixture_path="${AI_SLICE_FIXTURE_PATH:-${ROOT_DIR}/ai/fixtures/seed_batch.json}"
 roots_csv="${AI_SLICE_ROOTS_CSV:-doc-ai-1}"
 goals_csv="${AI_SLICE_GOALS_CSV:-ms.within_domain}"
 question="${AI_SLICE_QUESTION:-What domain is doc-ai-1 in?}"
 [[ -f "${fixture_path}" ]] || {
  echo "ai_vertical_slice.sh: fixture not found: ${fixture_path}" >&2
  exit 2
 }
 require_jq
 app_init
 ensure_daemon_ready() {
  if app_startup_checks >/dev/null 2>&1; then
    return 0
  fi
  if [[ "${auto_start_daemon}" == "1" ]]; then
    local daemon_backend="${AI_DAEMON_STORE_BACKEND:-fs}"
    local daemon_root="${AI_DAEMON_STORE_ROOT:-/tmp/amduat-asl-ai-slice}"
    local daemon_log="${AI_DAEMON_LOG_PATH:-/tmp/ai-vertical-slice-daemon.log}"
    echo "daemon not reachable; attempting startup via scripts/dev_start_daemon.sh" >&2
    STORE_BACKEND="${daemon_backend}" STORE_ROOT="${daemon_root}" SOCK="${SOCK}" SPACE="${SPACE}" \
      nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${daemon_log}" 2>&1 &
    local daemon_boot_pid="$!"
    disown "${daemon_boot_pid}" 2>/dev/null || true
    local i
    for i in $(seq 1 80); do
      if app_startup_checks >/dev/null 2>&1; then
        return 0
      fi
      sleep 0.1
    done
    app_startup_checks >/dev/null 2>&1 || {
      echo "ai_vertical_slice.sh: daemon still unreachable after startup attempt" >&2
      echo "see ${daemon_log} for startup logs" >&2
      return 1
    }
    return 0
  fi
  echo "ai_vertical_slice.sh: daemon unreachable on SOCK=${SOCK}" >&2
  echo "hint: run ./scripts/dev_start_daemon.sh or pass --auto-start-daemon" >&2
  return 1
 }
 ensure_daemon_ready
 echo "== startup-check =="
 app_startup_checks | jq .
 echo "== ingest fixture =="
 idempotency_key="ai-slice-$(date +%s)"
 payload="$(jq -c --arg k "${idempotency_key}" '.idempotency_key = $k' "${fixture_path}")"
 ingest_out="$(app_ingest_batch "${payload}")"
 printf '%s\n' "${ingest_out}" | jq .
 echo "== retrieve context =="
 retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")"
 printf '%s\n' "${retrieve_out}" | jq .
 printf '%s' "${retrieve_out}" | jq -e '((.nodes // []) | length) > 0 and ((.edges // []) | length) > 0' >/dev/null || {
  echo "ai_vertical_slice.sh: retrieve produced no graph context" >&2
  exit 1
 }
 echo "== grounded answer =="
 answer_out="$(app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "1")"
 printf '%s\n' "${answer_out}" | jq .
 printf '%s' "${answer_out}" | jq -e '.grounding.has_evidence == true and (.response | type == "string" and length > 0)' >/dev/null || {
  echo "ai_vertical_slice.sh: answer was not grounded with evidence" >&2
  exit 1
 }
 if [[ "${skip_evals}" != "1" ]]; then
  echo "== evals =="
  "${ROOT_DIR}/tests/ai_eval.sh"
  "${ROOT_DIR}/tests/ai_answer_eval.sh"
 else
  echo "== evals skipped =="
 fi
 echo "ai_vertical_slice.sh: PASS"
--- a/scripts/smoke_v2.sh
+++ b/scripts/smoke_v2.sh
@ -24,11 +24,45 @@ step() {
 }
 cleanup() {
  if [[ "${smoke_started_daemon:-0}" == "1" && -n "${smoke_daemon_pid:-}" ]]; then
    kill "${smoke_daemon_pid}" >/dev/null 2>&1 || true
    wait "${smoke_daemon_pid}" >/dev/null 2>&1 || true
  fi
  rm -f "${CURSOR_FILE}" >/dev/null 2>&1 || true
  if [[ "${SMOKE_USE_EXISTING_DAEMON:-0}" != "1" ]]; then
    rm -f "${smoke_sock:-}" >/dev/null 2>&1 || true
  fi
 }
 trap cleanup EXIT
 require_jq
 SMOKE_USE_EXISTING_DAEMON="${SMOKE_USE_EXISTING_DAEMON:-0}"
 smoke_started_daemon=0
 smoke_daemon_pid=""
 smoke_root="${SMOKE_STORE_ROOT:-/tmp/amduat-asl-smoke-${USER:-user}}"
 smoke_sock="${SMOKE_SOCK:-/tmp/amduatd-smoke-${USER:-user}.sock}"
 smoke_backend="${SMOKE_STORE_BACKEND:-fs}"
 smoke_log="${SMOKE_DAEMON_LOG_PATH:-/tmp/smoke-v2-daemon.log}"
 if [[ "${SMOKE_USE_EXISTING_DAEMON}" != "1" ]]; then
  rm -f "${smoke_sock}" >/dev/null 2>&1 || true
  export SOCK="${smoke_sock}"
  STORE_BACKEND="${smoke_backend}" STORE_ROOT="${smoke_root}" SOCK="${smoke_sock}" SPACE="${SPACE:-app1}" \
    nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${smoke_log}" 2>&1 &
  smoke_daemon_pid="$!"
  smoke_started_daemon=1
  ready=0
  for _ in $(seq 1 120); do
    if curl --globoff --silent --show-error --unix-socket "${smoke_sock}" "http://localhost/v2/readyz" >/dev/null 2>&1; then
      ready=1
      break
    fi
    sleep 0.1
  done
  [[ "${ready}" == "1" ]] || fail "isolated daemon did not become ready (log: ${smoke_log})"
 fi
 app_init
 run_id="$(date +%s)"
--- a/scripts/v2_app.sh
+++ b/scripts/v2_app.sh
@ -11,6 +11,8 @@ usage: $0 COMMAND [args]
 commands:
  startup-check
  ai-vertical-slice [--skip-evals] [--auto-start-daemon]
  ai-agent [--json] [--require-evidence] [--max-steps N] [--state-file PATH] [--auto-start-daemon] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
  ai-check
  ai-generate [--json] PROMPT
  ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]
@ -37,6 +39,12 @@ case "${cmd}" in
  startup-check)
    app_startup_checks
    ;;
  ai-vertical-slice)
    "${ROOT_DIR}/scripts/ai_vertical_slice.sh" "$@"
    ;;
  ai-agent)
    "${ROOT_DIR}/scripts/ai_agent_loop.sh" "$@"
    ;;
  ai-check)
    app_ai_check
    ;;
--- a/tests/integration_v2.sh
+++ b/tests/integration_v2.sh
@ -25,8 +25,49 @@ assert_contains() {
 # in the regular integration entrypoint.
 "${ROOT_DIR}/tests/changes_consumer_handler.sh"
 app_init
 require_jq
 IT_USE_EXISTING_DAEMON="${IT_USE_EXISTING_DAEMON:-0}"
 it_started_daemon=0
 it_daemon_pid=""
 it_root="${IT_STORE_ROOT:-/tmp/amduat-asl-it-${USER:-user}}"
 it_sock="${IT_SOCK:-/tmp/amduatd-it-${USER:-user}.sock}"
 it_backend="${IT_STORE_BACKEND:-fs}"
 it_log="${IT_DAEMON_LOG_PATH:-/tmp/integration-v2-daemon.log}"
 cleanup() {
  if [[ "${it_started_daemon}" == "1" && -n "${it_daemon_pid}" ]]; then
    kill "${it_daemon_pid}" >/dev/null 2>&1 || true
    wait "${it_daemon_pid}" >/dev/null 2>&1 || true
  fi
  if [[ "${IT_USE_EXISTING_DAEMON}" != "1" ]]; then
    rm -f "${it_sock}" >/dev/null 2>&1 || true
  fi
 }
 trap cleanup EXIT
 if [[ "${IT_USE_EXISTING_DAEMON}" != "1" ]]; then
  rm -f "${it_sock}" >/dev/null 2>&1 || true
  export SOCK="${it_sock}"
  STORE_BACKEND="${it_backend}" STORE_ROOT="${it_root}" SOCK="${it_sock}" SPACE="${SPACE:-app1}" \
    nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${it_log}" 2>&1 &
  it_daemon_pid="$!"
  it_started_daemon=1
  ready=0
  for _ in $(seq 1 120); do
    if curl --globoff --silent --show-error --unix-socket "${it_sock}" "http://localhost/v2/readyz" >/dev/null 2>&1; then
      ready=1
      break
    fi
    sleep 0.1
  done
  if [[ "${ready}" != "1" ]]; then
    echo "integration_v2.sh: FAIL (isolated daemon did not become ready, log: ${it_log})" >&2
    exit 1
  fi
 fi
 app_init
 if [[ ! -S "${SOCK}" ]]; then
  echo "integration_v2.sh: SKIP (socket not found at ${SOCK})"
  exit 77
		`@ -0,0 +1 @@`
							`{"run_id":"20260208-071138-238480","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"retrieve_failed","steps":[],"final_answer":{"response":"Agent loop ended without answer (retrieve_failed).","done_reason":"agent_stopped"}}`
		`@ -0,0 +1 @@`
							{"run_id":"20260208-071315-239391","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"planner_stop","steps":[{"step":1,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":0,"edges":0},"plan":{"action":"refine_query","next_roots_csv":"","next_goals_csv":"","reason":"Insufficient retrieval context to determine the domain of doc-ai-1."}},{"step":2,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":1,"edges":0},"plan":{"action":"stop","next_roots_csv":"","next_goals_csv":"","reason":"Insufficient context to determine the domain of doc-ai-1."}}],"final_answer":{"response":"Agent loop ended without answer (planner_stop).","done_reason":"agent_stopped"}}
		`@ -0,0 +1 @@`
							{"run_id":"20260208-071727-240673","input":{"question":"What domain is doc-ai-1 in?","roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","require_evidence":false},"final_query":{"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain"},"stop_reason":"answered","steps":[{"step":1,"roots_csv":"doc-ai-1","goals_csv":"ms.within_domain","context":{"nodes":2,"edges":1},"plan":{"action":"answer","next_roots_csv":"","next_goals_csv":"","reason":"The current context provides sufficient information to answer the question."}}],"final_answer":{"model":"qwen2.5-coder:7b","created_at":"2026-02-08T06:17:29.568796336Z","response":"topic-ai-alpha","done":true,"done_reason":"stop","context":[151644,8948,198,2610,525,1207,16948,11,3465,553,54364,14817,13,1446,525,264,10950,17847,13,151645,198,151644,872,198,16141,279,3405,1667,26687,279,4771,2266,3685,624,2679,279,2266,374,38313,11,1977,6896,1128,374,7402,624,19434,279,4226,63594,382,11212,2266,510,12288,510,12,4629,12,2143,12,16,198,41122,510,12,4629,12,2143,12,16,1177,1011,18164,258,20111,29052,8544,12,2143,64538,271,12288,510,12,8544,12,2143,64538,198,41122,510,12,4629,12,2143,12,16,1177,1011,18164,258,20111,29052,8544,12,2143,64538,271,14582,510,3838,7947,374,4629,12,2143,12,16,304,30,151645,198,151644,77091,198,16411,12,2143,64538],"total_duration":175968138,"load_duration":92265198,"prompt_eval_count":126,"prompt_eval_duration":13491488,"eval_count":5,"eval_duration":57138095,"evidence":[{"edge_ref":"00012d3c287ce8af9c400519c995041274d14319bccc204303aa97768706d90ddc87","subject":"doc-ai-1","predicate":"ms.within_domain","predicate_name":"ms.within_domain","object":"topic-ai-alpha","predicate_ref":"000140ebd9f62d224d780ebf2da668c3175fd15fc2bcceba9a99df1be4c5184329bb","subject_ref":"0001ccff97484870025da6d1b7b417f4678c9e5e541b2bebe80289ffdc07505b7c26","object_ref":"00018a5ed5c3c89fc445549fd5c917b1ccf1165faef6508ad886776cdd9553f437a7","depth":1,"reasons":["reachable_from_roots","goal_predicate_match"]}],"grounding":{"has_evidence":true,"require_evidence":false}}}