diff --git a/README.md b/README.md index e91e010..b97bf21 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,10 @@ cp config/env.example config/env.local ``` `dev_start_daemon.sh` initializes the store for the selected backend and, when `STORE_BACKEND=index`, runs a quick startup write probe. If index writes are -unhealthy, it automatically falls back to `fs` (configurable via -`INDEX_BACKEND_PROBE` and `INDEX_BACKEND_FALLBACK` in `config/env.local`). +unhealthy, it first tries a one-time index store repair (backup + re-init) and +then falls back to `fs` if still unhealthy (configurable via +`INDEX_BACKEND_PROBE`, `INDEX_BACKEND_REPAIR`, and `INDEX_BACKEND_FALLBACK` in +`config/env.local`). 3. Run startup checks against the daemon socket: @@ -50,6 +52,12 @@ Use the integrated v2 app flow wrapper: ```sh ./scripts/v2_app.sh startup-check +./scripts/v2_app.sh ai-check +./scripts/v2_app.sh ai-generate 'Summarize retrieval behavior in one sentence.' +./scripts/v2_app.sh ai-generate --json 'Summarize retrieval behavior in one sentence.' +./scripts/v2_app.sh ai-answer 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain' +./scripts/v2_app.sh ai-answer --json 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain' +./scripts/v2_app.sh ai-answer --json --require-evidence 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain' ./scripts/v2_app.sh ingest '{"idempotency_key":"k1","mode":"continue_on_error","nodes":[{"name":"doc-1"}]}' ./scripts/v2_app.sh sync-once ./scripts/v2_app.sh consume-changes --once @@ -70,6 +78,13 @@ Run local cursor/handler semantics checks (no daemon required): ./tests/changes_consumer_handler.sh ``` +Run AI connectivity and generation smoke eval (no daemon required): + +```sh +./tests/ai_eval.sh +./tests/ai_answer_eval.sh +``` + Run a fast end-to-end smoke (startup + ingest + sync + retrieve + tombstone): ```sh diff --git a/config/env.example b/config/env.example index 9ba2b92..7525db1 100644 --- a/config/env.example +++ b/config/env.example @@ -9,6 +9,8 @@ STORE_BACKEND="index" # For index backend, run a startup write probe and fallback to fs if broken. INDEX_BACKEND_PROBE="1" INDEX_BACKEND_FALLBACK="fs" +# Attempt one-time index root repair (backup + index init --force) before fs fallback. +INDEX_BACKEND_REPAIR="1" FS_FALLBACK_STORE_ROOT=".amduat-asl-fs" # AMDUATD_BIN="/path/to/amduatd" # ASL_BIN="/path/to/amduat-asl" @@ -26,3 +28,10 @@ RETRY_MAX_MS="2000" # Curl timeouts CURL_CONNECT_TIMEOUT_SECONDS="2" CURL_MAX_TIME_SECONDS="30" + +# Optional Ollama AI settings +OLLAMA_HOST="http://127.0.0.1:11434" +OLLAMA_MODEL="qwen2.5-coder:7b" +OLLAMA_TIMEOUT_SECONDS="60" +AI_CONTEXT_MAX_CHARS="12000" +AI_EVIDENCE_MAX_EDGES="5" diff --git a/docs/index-backend-repro.md b/docs/index-backend-repro.md new file mode 100644 index 0000000..03e1960 --- /dev/null +++ b/docs/index-backend-repro.md @@ -0,0 +1,85 @@ +# Index Backend Repro (Isolated) + +## Summary + +With a fresh store root, `amduatd` on `--store-backend index` accepts the first +`/v2/graph/nodes` write, then fails on the second node write with `{"error":"store error"}`. +The same call sequence succeeds on `--store-backend fs`. + +Observed daemon log on index: + +```text +ERROR: edge append failed for space/app1/daemon/edges (err=2) +``` + +`err=2` maps to `AMDUAT_ASL_COLLECTION_ERR_IO`. + +## Minimal Repro + +### 1) Start index backend + +```bash +STORE_BACKEND=index INDEX_BACKEND_PROBE=0 \ +STORE_ROOT=/tmp/amduat-asl-index-iso \ +SOCK=/tmp/amduatd-index-iso.sock SPACE=app1 \ +./scripts/dev_start_daemon.sh +``` + +### 2) Execute direct API calls + +```bash +curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \ + -H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \ + -X POST --data-binary '{"name":"doca1"}' \ + http://localhost/v2/graph/nodes + +curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \ + -H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \ + -X POST --data-binary '{"name":"topica1"}' \ + http://localhost/v2/graph/nodes + +curl --globoff --silent --show-error --unix-socket /tmp/amduatd-index-iso.sock \ + -H 'Content-Type: application/json' -H 'X-Amduat-Space: app1' \ + -X POST --data-binary '{"subject":"doca1","predicate":"ms.within_domain","object":"topica1","provenance":{"source_uri":"urn:t","extractor":"x","observed_at":1,"ingested_at":2,"trace_id":"t1"}}' \ + http://localhost/v2/graph/edges +``` + +Expected: both nodes + edge succeed. +Actual on index: second node returns `{"error":"store error"}`, edge returns `{"error":"object not found"}`. + +### 3) Control check (fs backend) + +Run the same payloads against fs: + +```bash +STORE_BACKEND=fs STORE_ROOT=/tmp/amduat-asl-fs-iso \ +SOCK=/tmp/amduatd-fs-iso.sock SPACE=app1 \ +./scripts/dev_start_daemon.sh +``` + +All three calls succeed on `fs`. + +## Likely Failure Path + +From source: + +- `vendor/amduat-api/src/amduatd_concepts.c` + - edge append path calls `amduat_asl_collection_append(...)` +- `vendor/amduat-api/vendor/amduat/src/core/asl_collection.c` + - `AMDUAT_ASL_COLLECTION_ERR_IO` comes from `amduat_asl_log_append(...)` failure +- `vendor/amduat-api/vendor/amduat/src/core/asl_log_store.c` + - failure likely in pointer/log append path (`pointer_get`, `store_get`, or `pointer_cas`) + +## Targeted Upstream Patch Path + +1. Add temporary diagnostic logging in `amduat_asl_log_append(...)` for: + - `pointer_name` + - `ptr_err` from `amduat_asl_pointer_get` + - `store_err` from `amduat_asl_store_get` / `amduat_asl_store_put` + - `cas_err` and retry exhaustion path +2. Re-run the minimal repro above and capture the first non-OK internal return. +3. Patch the specific failing branch in `asl_log_store.c` (or lower index store path) + rather than adding retries in app code. +4. Restore/commit missing graph index append scripts expected by CTest: + - `vendor/amduat-api/scripts/test_graph_index_append.sh` + - `vendor/amduat-api/scripts/test_graph_index_append_stress.sh` diff --git a/docs/v2-app-developer-guide.md b/docs/v2-app-developer-guide.md index 89dc9a6..85ec946 100644 --- a/docs/v2-app-developer-guide.md +++ b/docs/v2-app-developer-guide.md @@ -273,3 +273,19 @@ curl --unix-socket "${SOCK}" -sS -X POST "${BASE}/v2/graph/edges/tombstone" \ -H "X-Amduat-Space: ${SPACE}" \ -d "{\"edge_ref\":\"${EDGE_REF}\"}" ``` + +## 11) AI Answer Wrapper (Grounded) + +For local app usage via this scaffold: + +```sh +./scripts/v2_app.sh ai-answer 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain' +./scripts/v2_app.sh ai-answer --json 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain' +./scripts/v2_app.sh ai-answer --json --require-evidence 'doc-1' 'What topic is doc-1 in?' 'ms.within_domain' +``` + +Behavior notes: + +- The command retrieves graph context first (`/v2/graph/retrieve` with `/v2/graph/subgraph` fallback). +- JSON output includes an `evidence[]` array with normalized triplets and refs (`predicate_ref` retained, `predicate_name` preferred when resolvable). +- `--require-evidence` enforces strict grounding: command exits non-zero when no supporting edges are found. diff --git a/scripts/dev_start_daemon.sh b/scripts/dev_start_daemon.sh index fc3d098..d6ce9df 100755 --- a/scripts/dev_start_daemon.sh +++ b/scripts/dev_start_daemon.sh @@ -8,6 +8,10 @@ override_space="${SPACE:-}" override_sock="${SOCK:-}" override_amduatd_bin="${AMDUATD_BIN:-}" override_asl_bin="${ASL_BIN:-}" +override_index_backend_probe="${INDEX_BACKEND_PROBE:-}" +override_index_backend_fallback="${INDEX_BACKEND_FALLBACK:-}" +override_index_backend_repair="${INDEX_BACKEND_REPAIR:-}" +override_fs_fallback_store_root="${FS_FALLBACK_STORE_ROOT:-}" ENV_FILE="${ROOT_DIR}/config/env.local" if [[ ! -f "${ENV_FILE}" ]]; then @@ -22,6 +26,10 @@ if [[ -n "${override_space}" ]]; then SPACE="${override_space}"; fi if [[ -n "${override_sock}" ]]; then SOCK="${override_sock}"; fi if [[ -n "${override_amduatd_bin}" ]]; then AMDUATD_BIN="${override_amduatd_bin}"; fi if [[ -n "${override_asl_bin}" ]]; then ASL_BIN="${override_asl_bin}"; fi +if [[ -n "${override_index_backend_probe}" ]]; then INDEX_BACKEND_PROBE="${override_index_backend_probe}"; fi +if [[ -n "${override_index_backend_fallback}" ]]; then INDEX_BACKEND_FALLBACK="${override_index_backend_fallback}"; fi +if [[ -n "${override_index_backend_repair}" ]]; then INDEX_BACKEND_REPAIR="${override_index_backend_repair}"; fi +if [[ -n "${override_fs_fallback_store_root}" ]]; then FS_FALLBACK_STORE_ROOT="${override_fs_fallback_store_root}"; fi STORE_ROOT="${STORE_ROOT:-${ROOT_DIR}/.amduat-asl}" STORE_BACKEND="${STORE_BACKEND:-index}" @@ -29,6 +37,7 @@ SPACE="${SPACE:-app1}" SOCK="${SOCK:-${ROOT_DIR}/amduatd.sock}" INDEX_BACKEND_PROBE="${INDEX_BACKEND_PROBE:-1}" INDEX_BACKEND_FALLBACK="${INDEX_BACKEND_FALLBACK:-fs}" +INDEX_BACKEND_REPAIR="${INDEX_BACKEND_REPAIR:-1}" FS_FALLBACK_STORE_ROOT="${FS_FALLBACK_STORE_ROOT:-${STORE_ROOT}-fs}" if [[ "${STORE_ROOT}" != /* ]]; then STORE_ROOT="${ROOT_DIR}/${STORE_ROOT}"; fi @@ -106,26 +115,95 @@ wait_ready() { probe_index_write_path() { local sock="$1" local space="$2" - local run_id - run_id="$(date +%s)" - local doc="probe-doc-${run_id}" - local topic="probe-topic-${run_id}" - local payload - payload="$(cat <&2 - return 1 + local raw out code + raw="$(curl --globoff --silent --show-error --unix-socket "${sock}" \ + -H "Content-Type: application/json" \ + -H "X-Amduat-Space: ${space}" \ + -X POST --data-binary "${payload}" \ + -w $'\n%{http_code}' \ + "http://localhost/v2/graph/batch")" || return 1 + code="${raw##*$'\n'}" + out="${raw%$'\n'*}" + + if [[ "${code}" != "200" ]]; then + echo "index probe HTTP ${code}: ${out}" >&2 + return 1 + fi + + # A successful backend health check is "node+edge write path is healthy for repeated valid payloads". + if command -v jq >/dev/null 2>&1; then + if ! printf '%s' "${out}" | jq -e '.' >/dev/null 2>&1; then + echo "index probe returned non-JSON payload: ${out}" >&2 + return 1 + fi + if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; ((.code // 0) | tonumber) >= 500)' >/dev/null 2>&1; then + echo "index probe saw server error result: ${out}" >&2 + return 1 + fi + if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; (.status == "error") or (((.code // 0) | tonumber) >= 400))' >/dev/null 2>&1; then + echo "index probe saw non-success result: ${out}" >&2 + return 1 + fi + if ! printf '%s' "${out}" | jq -e '([(.results // [] )[]? | select(.kind == "node" and .status == "applied" and ((.code // 0) | tonumber) < 300)] | length) >= 2' >/dev/null 2>&1; then + echo "index probe missing applied node writes: ${out}" >&2 + return 1 + fi + if ! printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; .kind == "edge" and .status == "applied" and ((.code // 0) | tonumber) < 300)' >/dev/null 2>&1; then + echo "index probe missing applied edge write: ${out}" >&2 + return 1 + fi + if ! printf '%s' "${out}" | jq -e '.ok == true' >/dev/null 2>&1; then + echo "index probe non-ok payload: ${out}" >&2 + return 1 + fi + continue + fi + + if [[ "${out}" == *'"ok":true'* ]]; then + continue + fi + if [[ "${out}" == *'"code":5'* ]]; then + echo "index probe saw 5xx result: ${out}" >&2 + return 1 + fi + if [[ "${out}" == *'"ok":false'* || "${out}" == *'"status":"error"'* ]]; then + echo "index probe non-ok payload: ${out}" >&2 + return 1 + fi + echo "index probe unexpected payload: ${out}" >&2 + return 1 + done + return 0 } run_daemon_foreground() { @@ -135,19 +213,41 @@ run_daemon_foreground() { exec "${AMDUATD_BIN}" --root "${root}" --sock "${SOCK}" --store-backend "${backend}" --space "${SPACE}" } +start_probe_daemon() { + echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2 + "${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" & + daemon_pid=$! + cleanup_probe() { + kill "${daemon_pid}" >/dev/null 2>&1 || true + } + trap cleanup_probe EXIT +} + +stop_probe_daemon() { + kill "${daemon_pid}" >/dev/null 2>&1 || true + wait "${daemon_pid}" >/dev/null 2>&1 || true + trap - EXIT +} + +repair_index_store() { + local root="$1" + local backup_root="${root}.bak-$(date +%Y%m%d-%H%M%S)" + if [[ -d "${root}" ]]; then + echo "backing up index store root to ${backup_root}" >&2 + mv "${root}" "${backup_root}" + fi + mkdir -p "${root}" + echo "reinitializing index-backed ASL store at ${root}" >&2 + "${ASL_BIN}" index init --root "${root}" --force +} + init_store "${STORE_BACKEND}" "${STORE_ROOT}" if [[ "${STORE_BACKEND}" != "index" || "${INDEX_BACKEND_PROBE}" != "1" ]]; then run_daemon_foreground "${STORE_BACKEND}" "${STORE_ROOT}" fi -echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2 -"${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" & -daemon_pid=$! -cleanup_probe() { - kill "${daemon_pid}" >/dev/null 2>&1 || true -} -trap cleanup_probe EXIT +start_probe_daemon fallback_to_fs=0 if ! wait_ready "${SOCK}"; then @@ -164,9 +264,27 @@ if [[ "${fallback_to_fs}" == "0" ]]; then exit $? fi -kill "${daemon_pid}" >/dev/null 2>&1 || true -wait "${daemon_pid}" >/dev/null 2>&1 || true -trap - EXIT +stop_probe_daemon + +if [[ "${INDEX_BACKEND_REPAIR}" == "1" ]]; then + echo "attempting index store repair before fs fallback" >&2 + repair_index_store "${STORE_ROOT}" + start_probe_daemon + + repaired_ok=0 + if wait_ready "${SOCK}" && probe_index_write_path "${SOCK}" "${SPACE}"; then + repaired_ok=1 + fi + + if [[ "${repaired_ok}" == "1" ]]; then + trap - EXIT + wait "${daemon_pid}" + exit $? + fi + + echo "index backend repair failed" >&2 + stop_probe_daemon +fi if [[ "${INDEX_BACKEND_FALLBACK}" != "fs" ]]; then echo "set INDEX_BACKEND_FALLBACK=fs to auto-fallback, or INDEX_BACKEND_PROBE=0 to disable probe" >&2 diff --git a/scripts/smoke_v2.sh b/scripts/smoke_v2.sh index f21bce4..40be299 100755 --- a/scripts/smoke_v2.sh +++ b/scripts/smoke_v2.sh @@ -33,8 +33,8 @@ app_init run_id="$(date +%s)" idempotency_key="smoke-seed-${run_id}" -doc_name="smoke-doc-${run_id}" -topic_name="smoke-topic-${run_id}" +doc_name="smokedoc${run_id}" +topic_name="smoketopic${run_id}" goal_pred="ms.within_domain" step "startup" diff --git a/scripts/v2_app.sh b/scripts/v2_app.sh index a3f2bc9..ab3a7a9 100755 --- a/scripts/v2_app.sh +++ b/scripts/v2_app.sh @@ -11,6 +11,9 @@ usage: $0 COMMAND [args] commands: startup-check + ai-check + ai-generate [--json] PROMPT + ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV] ingest PAYLOAD_JSON sync-once consume-changes [--once] @@ -34,6 +37,56 @@ case "${cmd}" in startup-check) app_startup_checks ;; + ai-check) + app_ai_check + ;; + ai-generate) + output_mode="text" + if [[ $# -gt 0 && "$1" == "--json" ]]; then + output_mode="json" + shift + fi + if [[ $# -lt 1 ]]; then + echo "usage: $0 ai-generate [--json] PROMPT" >&2 + exit 2 + fi + if [[ "${output_mode}" == "json" ]]; then + app_ai_generate_json "$*" + else + app_ai_generate_text "$*" + fi + ;; + ai-answer) + output_mode="text" + require_evidence=0 + while [[ $# -gt 0 ]]; do + case "$1" in + --json) + output_mode="json" + shift + ;; + --require-evidence) + require_evidence=1 + shift + ;; + *) + break + ;; + esac + done + if [[ $# -lt 2 || $# -gt 3 ]]; then + echo "usage: $0 ai-answer [--json] [--require-evidence] ROOTS_CSV QUESTION [GOAL_PREDICATES_CSV]" >&2 + exit 2 + fi + roots_csv="$1" + question="$2" + goals_csv="${3:-}" + if [[ "${output_mode}" == "json" ]]; then + app_ai_answer_json "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}" + else + app_ai_answer_text "${roots_csv}" "${question}" "${goals_csv}" "${require_evidence}" + fi + ;; ingest) if [[ $# -ne 1 ]]; then echo "usage: $0 ingest PAYLOAD_JSON" >&2 diff --git a/src/app_v2.sh b/src/app_v2.sh index e35fc41..92dc2a0 100755 --- a/src/app_v2.sh +++ b/src/app_v2.sh @@ -78,6 +78,7 @@ app_retrieve_with_fallback() { printf '%s\n' "${AMDUAT_LAST_BODY}" return 0 fi + local retrieve_status="${AMDUAT_LAST_STATUS}" local first_root first_root="$(printf '%s' "${roots_csv}" | awk -F',' '{gsub(/^ +| +$/, "", $1); printf "%s", $1}')" @@ -90,8 +91,19 @@ app_retrieve_with_fallback() { fi fi - amduat_api_call GET "${fallback_path}" - printf '%s\n' "${AMDUAT_LAST_BODY}" + if amduat_api_call GET "${fallback_path}"; then + printf '%s\n' "${AMDUAT_LAST_BODY}" + return 0 + fi + local fallback_status="${AMDUAT_LAST_STATUS}" + + if [[ "${retrieve_status}" == "404" && "${fallback_status}" == "404" ]]; then + # Return an explicit empty graph so callers can handle no-context deterministically. + printf '%s\n' '{"nodes":[],"edges":[],"stats":{"reason":"not_found"}}' + return 0 + fi + + return 1 } app_tombstone_edge() { @@ -101,3 +113,294 @@ app_tombstone_edge() { amduat_api_call POST "/v2/graph/edges/tombstone" "${payload}" printf '%s\n' "${AMDUAT_LAST_BODY}" } + +app_ai_check() { + local tags + tags="$( + curl --globoff --silent --show-error \ + --connect-timeout "${CURL_CONNECT_TIMEOUT_SECONDS}" \ + --max-time "${OLLAMA_TIMEOUT_SECONDS}" \ + "${OLLAMA_HOST}/api/tags" + )" || { + echo "failed to reach ollama at ${OLLAMA_HOST}" >&2 + return 1 + } + + printf '%s\n' "${tags}" + if [[ "${tags}" != *"\"name\":\"${OLLAMA_MODEL}\""* ]]; then + echo "warning: configured model not found in tags: ${OLLAMA_MODEL}" >&2 + fi +} + +app_ai_generate_json() { + if ! command -v jq >/dev/null 2>&1; then + echo "jq is required for ai-generate" >&2 + return 2 + fi + + local prompt="$1" + local payload + payload="$(jq -nc --arg model "${OLLAMA_MODEL}" --arg prompt "${prompt}" \ + '{model:$model,prompt:$prompt,stream:false}')" + + local out + out="$( + curl --globoff --silent --show-error \ + --connect-timeout "${CURL_CONNECT_TIMEOUT_SECONDS}" \ + --max-time "${OLLAMA_TIMEOUT_SECONDS}" \ + -H "Content-Type: application/json" \ + -X POST \ + --data-binary "${payload}" \ + "${OLLAMA_HOST}/api/generate" + )" || { + echo "failed to call ollama generate at ${OLLAMA_HOST}" >&2 + return 1 + } + + printf '%s\n' "${out}" +} + +app_ai_generate() { + local prompt="$1" + app_ai_generate_json "${prompt}" +} + +app_ai_predicate_map_json() { + if ! command -v jq >/dev/null 2>&1; then + echo "jq is required for ai-answer" >&2 + return 2 + fi + + if [[ "${APP_AI_PREDICATE_MAP_INIT:-0}" == "1" ]]; then + if [[ -n "${APP_AI_PREDICATE_MAP_CACHE:-}" ]]; then + printf '%s\n' "${APP_AI_PREDICATE_MAP_CACHE}" + else + printf '{}\n' + fi + return 0 + fi + + local schema_json="{}" + if amduat_api_call GET "/v2/graph/schema/predicates"; then + schema_json="${AMDUAT_LAST_BODY}" + fi + + APP_AI_PREDICATE_MAP_CACHE="$( + printf '%s' "${schema_json}" | jq -c ' + def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";"")); + def entry_from(o): + {ref: clean(o.predicate_ref // o.ref // o.predicate // o.id // ""), name: clean(o.alias // o.name // o.predicate // o.label // "")}; + def pred_list(root): + if (root | type) == "array" then root + elif (root | type) == "object" then (root.predicates // root.items // root.data // root.results // []) + else [] + end; + + (pred_list(.) | map(entry_from(.)) | map(select(.ref != "" and .name != ""))) + | reduce .[] as $e ({}; .[$e.ref] = $e.name) + ' 2>/dev/null || printf '{}' + )" + APP_AI_PREDICATE_MAP_INIT=1 + printf '%s\n' "${APP_AI_PREDICATE_MAP_CACHE}" +} + +app_ai_build_context() { + if ! command -v jq >/dev/null 2>&1; then + echo "jq is required for ai-answer" >&2 + return 2 + fi + + local retrieve_json="$1" + local predicate_map_json="${2:-}" + local goals_csv="${3:-}" + if [[ -z "${predicate_map_json}" ]]; then + predicate_map_json='{}' + fi + local goals_json="[]" + if [[ -n "${goals_csv}" ]]; then + goals_json="$(printf '%s' "${goals_csv}" | awk -F',' 'BEGIN{printf "["} {for(i=1;i<=NF;i++){gsub(/^ +| +$/, "", $i); if (length($i)>0){if (printed) printf ","; printf "\"%s\"", $i; printed=1}}} END{printf "]"}')" + fi + printf '%s' "${retrieve_json}" | jq -r --arg predicate_map_json "${predicate_map_json}" --argjson goals "${goals_json}" ' + def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";"")); + def node_name(n): clean(n.name // n.node_ref // n.id // ""); + ($predicate_map_json | fromjson? // {}) as $predicate_map + | ($goals | map(clean(.)) | map(select(length > 0))) as $goal_set + | (if ($goal_set | length) == 1 then $goal_set[0] else "" end) as $goal_hint + | + . as $root + | (($root.nodes // []) | reduce .[] as $n ({}; + if (($n.concept_ref // "") | tostring | length) > 0 + then .[$n.concept_ref] = (node_name($n)) + else . + end + )) as $node_map + | def edge_triplet(e): + { + s: clean(e.subject // e.s // ($node_map[e.subject_ref] // e.subject_ref // "")), + p: clean($predicate_map[(e.predicate_ref // "")] // e.predicate // e.p // (if $goal_hint != "" then $goal_hint else empty end) // e.predicate_ref // ""), + o: clean(e.object // e.o // ($node_map[e.object_ref] // e.object_ref // "")) + }; + + { + nodes: (($root.nodes // []) | map(node_name(.)) | map(select(length > 0)) | unique | .[0:50]), + edges: (($root.edges // []) | map(edge_triplet(.)) | map(select(.s != "" and .p != "" and .o != "")) | .[0:100]) + } + | "Nodes:\n" + + (if (.nodes | length) == 0 then "- (none)\n" else ((.nodes[] | "- " + .) + "\n") end) + + "Edges:\n" + + (if (.edges | length) == 0 then "- (none)\n" else ((.edges[] | "- " + .s + " --" + .p + "--> " + .o) + "\n") end) + ' +} + +app_ai_extract_evidence_json() { + if ! command -v jq >/dev/null 2>&1; then + echo "jq is required for ai-answer" >&2 + return 2 + fi + + local retrieve_json="$1" + local goals_csv="${2:-}" + local predicate_map_json="${3:-}" + if [[ -z "${predicate_map_json}" ]]; then + predicate_map_json='{}' + fi + local goals_json="[]" + if [[ -n "${goals_csv}" ]]; then + goals_json="$(printf '%s' "${goals_csv}" | awk -F',' 'BEGIN{printf "["} {for(i=1;i<=NF;i++){gsub(/^ +| +$/, "", $i); if (length($i)>0){if (printed) printf ","; printf "\"%s\"", $i; printed=1}}} END{printf "]"}')" + fi + + printf '%s' "${retrieve_json}" | jq -c --argjson goals "${goals_json}" --arg predicate_map_json "${predicate_map_json}" --argjson max_edges "${AI_EVIDENCE_MAX_EDGES}" ' + def clean(v): (v // "" | tostring | gsub("[\\r\\n\\t]+";" ") | gsub(" +";" ") | sub("^ ";"") | sub(" $";"")); + ($predicate_map_json | fromjson? // {}) as $predicate_map + | + . as $root + | (($root.nodes // []) | reduce .[] as $n ({}; + if (($n.concept_ref // "") | tostring | length) > 0 + then .[$n.concept_ref] = clean($n.name // $n.node_ref // $n.id // $n.concept_ref) + else . + end + )) as $node_map + | (($root.explanations // []) | reduce .[] as $x ({}; .[$x.edge_ref] = {depth: ($x.depth // null), reasons: ($x.reasons // [])})) as $exp_map + | ($goals | map(clean(.)) | map(select(length > 0))) as $goal_set + | (($root.edges // []) + | map({ + edge_ref: clean(.edge_ref // ""), + subject: clean(.subject // .s // ($node_map[.subject_ref] // .subject_ref // "")), + predicate: clean($predicate_map[(.predicate_ref // "")] // .predicate // .p // .predicate_ref // ""), + predicate_name: clean($predicate_map[(.predicate_ref // "")] // .predicate // .p // ""), + object: clean(.object // .o // ($node_map[.object_ref] // .object_ref // "")), + predicate_ref: clean(.predicate_ref // ""), + subject_ref: clean(.subject_ref // ""), + object_ref: clean(.object_ref // "") + }) + | map(select(.subject != "" and .predicate != "" and .object != "")) + | map(. + ($exp_map[.edge_ref] // {depth:null,reasons:[]}))) as $all_edges + | ($all_edges | map( + if ($goal_set | length) == 0 then . + else select((.predicate as $p | $goal_set | index($p)) != null or (.predicate_ref as $pr | $goal_set | index($pr)) != null) + end + )) as $filtered_edges + | (if ($goal_set | length) > 0 and ($filtered_edges | length) == 0 then $all_edges else $filtered_edges end) as $selected_edges + | (if ($goal_set | length) == 1 then $goal_set[0] else "" end) as $goal_hint + | ($selected_edges | map( + if .predicate_name == "" and $goal_hint != "" then . + {predicate_name:$goal_hint, predicate:$goal_hint} + else . + end + )) + | .[0:$max_edges] + ' +} + +app_ai_answer_json() { + if ! command -v jq >/dev/null 2>&1; then + echo "jq is required for ai-answer" >&2 + return 2 + fi + + local roots_csv="$1" + local question="$2" + local goals_csv="${3:-}" + local require_evidence="${4:-0}" + + local retrieve_out + retrieve_out="$(app_retrieve_with_fallback "${roots_csv}" "${goals_csv}")" || return 1 + + local predicate_map_json + predicate_map_json="$(app_ai_predicate_map_json)" || return $? + + local has_context + has_context="$(printf '%s' "${retrieve_out}" | jq -r '((.nodes // []) | length) + ((.edges // []) | length) > 0')" + local evidence + evidence="$(app_ai_extract_evidence_json "${retrieve_out}" "${goals_csv}" "${predicate_map_json}")" || return $? + local evidence_count + evidence_count="$(printf '%s' "${evidence}" | jq -r 'length')" + local has_evidence="false" + if [[ "${evidence_count}" -gt 0 ]]; then + has_evidence="true" + fi + if [[ "${has_context}" != "true" ]]; then + jq -nc --arg model "${OLLAMA_MODEL}" \ + --arg response "Insufficient graph context for the requested roots/predicates. Ingest or reference existing nodes/edges first." \ + --argjson evidence "${evidence}" \ + --argjson require_evidence "$( [[ "${require_evidence}" == "1" ]] && echo true || echo false )" \ + '{model:$model,response:$response,done:true,done_reason:"no_context",evidence:$evidence,grounding:{has_evidence:false,require_evidence:$require_evidence}}' + return 0 + fi + + if [[ "${require_evidence}" == "1" && "${has_evidence}" != "true" ]]; then + jq -nc --arg model "${OLLAMA_MODEL}" \ + --arg response "No supporting graph evidence found for the requested roots/predicates." \ + --argjson evidence "${evidence}" \ + '{model:$model,response:$response,done:true,done_reason:"no_evidence",evidence:$evidence,grounding:{has_evidence:false,require_evidence:true}}' + return 1 + fi + + local context + context="$(app_ai_build_context "${retrieve_out}" "${predicate_map_json}" "${goals_csv}")" || return $? + context="$(printf '%s' "${context}" | head -c "${AI_CONTEXT_MAX_CHARS}")" + + local prompt + prompt="$(cat </dev/null 2>&1; then + echo "${out}" + return 0 + fi + printf '%s' "${out}" | jq -r '.response // empty' + printf '\n' +} + +app_ai_generate_text() { + local prompt="$1" + local out + out="$(app_ai_generate_json "${prompt}")" || return $? + if ! command -v jq >/dev/null 2>&1; then + echo "${out}" + return 0 + fi + printf '%s' "${out}" | jq -r '.response // empty' + printf '\n' +} diff --git a/src/config.sh b/src/config.sh index ad91922..cadf44b 100755 --- a/src/config.sh +++ b/src/config.sh @@ -15,6 +15,11 @@ amduat_config_load() { local override_retry_max_ms="${RETRY_MAX_MS:-}" local override_connect_timeout="${CURL_CONNECT_TIMEOUT_SECONDS:-}" local override_max_time="${CURL_MAX_TIME_SECONDS:-}" + local override_ollama_host="${OLLAMA_HOST:-}" + local override_ollama_model="${OLLAMA_MODEL:-}" + local override_ollama_timeout="${OLLAMA_TIMEOUT_SECONDS:-}" + local override_ai_context_max_chars="${AI_CONTEXT_MAX_CHARS:-}" + local override_ai_evidence_max_edges="${AI_EVIDENCE_MAX_EDGES:-}" local env_file="${root_dir}/config/env.local" if [[ ! -f "${env_file}" ]]; then @@ -34,6 +39,11 @@ amduat_config_load() { if [[ -n "${override_retry_max_ms}" ]]; then RETRY_MAX_MS="${override_retry_max_ms}"; fi if [[ -n "${override_connect_timeout}" ]]; then CURL_CONNECT_TIMEOUT_SECONDS="${override_connect_timeout}"; fi if [[ -n "${override_max_time}" ]]; then CURL_MAX_TIME_SECONDS="${override_max_time}"; fi + if [[ -n "${override_ollama_host}" ]]; then OLLAMA_HOST="${override_ollama_host}"; fi + if [[ -n "${override_ollama_model}" ]]; then OLLAMA_MODEL="${override_ollama_model}"; fi + if [[ -n "${override_ollama_timeout}" ]]; then OLLAMA_TIMEOUT_SECONDS="${override_ollama_timeout}"; fi + if [[ -n "${override_ai_context_max_chars}" ]]; then AI_CONTEXT_MAX_CHARS="${override_ai_context_max_chars}"; fi + if [[ -n "${override_ai_evidence_max_edges}" ]]; then AI_EVIDENCE_MAX_EDGES="${override_ai_evidence_max_edges}"; fi SOCK="${SOCK:-amduatd.sock}" BASE="${BASE:-http://localhost}" @@ -52,4 +62,10 @@ amduat_config_load() { CURL_CONNECT_TIMEOUT_SECONDS="${CURL_CONNECT_TIMEOUT_SECONDS:-2}" CURL_MAX_TIME_SECONDS="${CURL_MAX_TIME_SECONDS:-30}" + + OLLAMA_HOST="${OLLAMA_HOST:-http://127.0.0.1:11434}" + OLLAMA_MODEL="${OLLAMA_MODEL:-qwen2.5-coder:7b}" + OLLAMA_TIMEOUT_SECONDS="${OLLAMA_TIMEOUT_SECONDS:-60}" + AI_CONTEXT_MAX_CHARS="${AI_CONTEXT_MAX_CHARS:-12000}" + AI_EVIDENCE_MAX_EDGES="${AI_EVIDENCE_MAX_EDGES:-5}" } diff --git a/tests/ai_answer_eval.sh b/tests/ai_answer_eval.sh new file mode 100755 index 0000000..94f019f --- /dev/null +++ b/tests/ai_answer_eval.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=/dev/null +source "${ROOT_DIR}/src/app_v2.sh" + +require_jq() { + if ! command -v jq >/dev/null 2>&1; then + echo "ai_answer_eval.sh: jq is required" >&2 + exit 2 + fi +} + +fail() { + echo "ai_answer_eval.sh: FAIL: $1" >&2 + exit 1 +} + +require_jq +app_init + +tmp_dir="$(mktemp -d /tmp/ai-answer-eval.XXXXXX)" +cleanup() { + rm -rf "${tmp_dir}" +} +trap cleanup EXIT + +mock_prompt_file="${tmp_dir}/prompt.txt" +mock_retrieve_path_file="${tmp_dir}/retrieve.path" +mock_retrieve_payload_file="${tmp_dir}/retrieve.payload" + +amduat_api_call() { + local method="$1" + local path="$2" + local body="${3:-}" + + if [[ "${method}" == "GET" && "${path}" == "/v2/graph/schema/predicates" ]]; then + AMDUAT_LAST_STATUS="200" + AMDUAT_LAST_BODY='{"predicates":[{"predicate_ref":"ref-ms-within-domain","alias":"ms.within_domain"}]}' + return 0 + fi + + if [[ "${method}" == "POST" && "${path}" == "/v2/graph/retrieve" ]]; then + printf '%s' "${path}" > "${mock_retrieve_path_file}" + printf '%s' "${body}" > "${mock_retrieve_payload_file}" + AMDUAT_LAST_STATUS="200" + if [[ "${MOCK_NO_EDGES:-0}" == "1" ]]; then + AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1"},{"name":"topic:alpha"}],"edges":[]}' + else + AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1","concept_ref":"ref-doc1"},{"name":"topic:alpha","concept_ref":"ref-topic-alpha"}],"edges":[{"subject_ref":"ref-doc1","predicate_ref":"ref-ms-within-domain","object_ref":"ref-topic-alpha","edge_ref":"ref-edge-1"}]}' + fi + return 0 + fi + + AMDUAT_LAST_STATUS="404" + AMDUAT_LAST_BODY='{"error":"not mocked"}' + return 1 +} + +curl() { + local body="" + local endpoint="" + while [[ $# -gt 0 ]]; do + case "$1" in + --data-binary) + body="$2" + shift 2 + ;; + http://*|https://*) + endpoint="$1" + shift + ;; + *) + shift + ;; + esac + done + + [[ "${endpoint}" == "${OLLAMA_HOST}/api/generate" ]] || fail "unexpected curl endpoint: ${endpoint}" + prompt="$(printf '%s' "${body}" | jq -r '.prompt')" + printf '%s' "${prompt}" > "${mock_prompt_file}" + printf '%s\n' '{"model":"mock-model","response":"Grounded answer from mock model."}' +} + +json_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_json failed" +printf '%s' "${json_out}" | jq -e '.response == "Grounded answer from mock model."' >/dev/null || fail "unexpected response payload" +printf '%s' "${json_out}" | jq -e '.evidence | length == 1' >/dev/null || fail "missing evidence" +printf '%s' "${json_out}" | jq -e '.grounding.has_evidence == true' >/dev/null || fail "grounding.has_evidence should be true" +printf '%s' "${json_out}" | jq -e '.evidence[0].subject == "doc:1" and .evidence[0].predicate == "ms.within_domain" and .evidence[0].object == "topic:alpha"' >/dev/null \ + || fail "evidence triplet mismatch" +printf '%s' "${json_out}" | jq -e '.evidence[0].predicate_name == "ms.within_domain"' >/dev/null || fail "predicate_name should resolve from schema" + +[[ -f "${mock_retrieve_path_file}" ]] || fail "retrieve call was not made" +[[ "$(cat "${mock_retrieve_path_file}")" == "/v2/graph/retrieve" ]] || fail "retrieve path mismatch" + +retrieve_payload="$(cat "${mock_retrieve_payload_file}")" +printf '%s' "${retrieve_payload}" | jq -e '.roots == ["doc:1"]' >/dev/null || fail "roots payload mismatch" +printf '%s' "${retrieve_payload}" | jq -e '.goal_predicates == ["ms.within_domain"]' >/dev/null || fail "goal_predicates payload mismatch" + +prompt_text="$(cat "${mock_prompt_file}")" +[[ "${prompt_text}" == *"Question:"* ]] || fail "prompt missing question label" +[[ "${prompt_text}" == *"What domain is doc:1 in?"* ]] || fail "prompt missing question" +[[ "${prompt_text}" == *"doc:1 --ms.within_domain--> topic:alpha"* ]] || fail "prompt missing graph edge context" + +text_out="$(app_ai_answer_text "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_text failed" +[[ "${text_out}" == "Grounded answer from mock model." ]] || fail "text output mismatch" + +set +e +MOCK_NO_EDGES=1 strict_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain" "1")" +strict_rc=$? +set -e +[[ "${strict_rc}" -ne 0 ]] || fail "expected non-zero for --require-evidence with no supporting edges" +printf '%s' "${strict_out}" | jq -e '.done_reason == "no_evidence"' >/dev/null || fail "expected done_reason no_evidence" +printf '%s' "${strict_out}" | jq -e '.grounding.require_evidence == true and .grounding.has_evidence == false' >/dev/null \ + || fail "expected strict grounding flags" + +echo "ai_answer_eval.sh: PASS" diff --git a/tests/ai_eval.sh b/tests/ai_eval.sh new file mode 100755 index 0000000..6e475a7 --- /dev/null +++ b/tests/ai_eval.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=/dev/null +source "${ROOT_DIR}/src/app_v2.sh" + +require_jq() { + if ! command -v jq >/dev/null 2>&1; then + echo "ai_eval.sh: jq is required" >&2 + exit 2 + fi +} + +fail() { + echo "ai_eval.sh: FAIL: $1" >&2 + exit 1 +} + +app_init +require_jq + +tags_out="$(app_ai_check)" || fail "ai-check failed" +printf '%s' "${tags_out}" | jq -e '.models | type == "array"' >/dev/null || fail "tags response missing models array" +printf '%s' "${tags_out}" | jq -e --arg model "${OLLAMA_MODEL}" '.models[] | select(.name == $model)' >/dev/null \ + || fail "configured model not present: ${OLLAMA_MODEL}" + +prompt="Return one short sentence describing graph retrieval testing." +gen_out="$(app_ai_generate "${prompt}")" || fail "ai-generate failed" + +printf '%s' "${gen_out}" | jq -e '.response | type == "string"' >/dev/null || fail "generate response missing text" +printf '%s' "${gen_out}" | jq -e '.model | type == "string"' >/dev/null || fail "generate response missing model" + +response_text="$(printf '%s' "${gen_out}" | jq -r '.response')" +[[ -n "${response_text//[[:space:]]/}" ]] || fail "generate response text is empty" + +echo "ai_eval.sh: PASS" diff --git a/tests/integration_v2.sh b/tests/integration_v2.sh index c262f12..db6340f 100755 --- a/tests/integration_v2.sh +++ b/tests/integration_v2.sh @@ -40,8 +40,8 @@ assert_contains "${startup_out}" '"ok"' run_id="$(date +%s)" trace_id="trace-it-${run_id}" idempotency_key="it-seed-${run_id}" -doc_name="doc-it${run_id}" -topic_name="topic-italpha${run_id}" +doc_name="docit${run_id}" +topic_name="topicitalpha${run_id}" payload="$(cat </dev/null || { + echo "expected ai-answer to return JSON with response text" >&2 + exit 1 + } + printf '%s' "${ai_answer_out}" | jq -e '.evidence | type == "array"' >/dev/null || { + echo "expected ai-answer to include evidence array" >&2 + exit 1 + } + app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain" "1" >/dev/null || { + echo "expected strict ai-answer to succeed when evidence exists" >&2 + exit 1 + } +fi + # Capture edge_ref using subgraph surface to avoid format differences. subgraph_out="$(amduat_api_call GET "/v2/graph/subgraph?roots[]=${doc_name}&max_depth=2&dir=outgoing&limit_nodes=200&limit_edges=400&include_stats=true&max_result_bytes=1048576" && printf '%s' "${AMDUAT_LAST_BODY}")" edge_ref="$(printf '%s' "${subgraph_out}" | jq -r '.edges[0].edge_ref // empty')"