#!/usr/bin/env bash set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # shellcheck source=/dev/null source "${ROOT_DIR}/src/app_v2.sh" require_jq() { if ! command -v jq >/dev/null 2>&1; then echo "ai_answer_eval.sh: jq is required" >&2 exit 2 fi } fail() { echo "ai_answer_eval.sh: FAIL: $1" >&2 exit 1 } require_jq app_init tmp_dir="$(mktemp -d /tmp/ai-answer-eval.XXXXXX)" cleanup() { rm -rf "${tmp_dir}" } trap cleanup EXIT mock_prompt_file="${tmp_dir}/prompt.txt" mock_retrieve_path_file="${tmp_dir}/retrieve.path" mock_retrieve_payload_file="${tmp_dir}/retrieve.payload" amduat_api_call() { local method="$1" local path="$2" local body="${3:-}" if [[ "${method}" == "GET" && "${path}" == "/v2/graph/schema/predicates" ]]; then AMDUAT_LAST_STATUS="200" AMDUAT_LAST_BODY='{"predicates":[{"predicate_ref":"ref-ms-within-domain","alias":"ms.within_domain"}]}' return 0 fi if [[ "${method}" == "POST" && "${path}" == "/v2/graph/retrieve" ]]; then printf '%s' "${path}" > "${mock_retrieve_path_file}" printf '%s' "${body}" > "${mock_retrieve_payload_file}" AMDUAT_LAST_STATUS="200" if [[ "${MOCK_NO_EDGES:-0}" == "1" ]]; then AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1"},{"name":"topic:alpha"}],"edges":[]}' else AMDUAT_LAST_BODY='{"nodes":[{"name":"doc:1","concept_ref":"ref-doc1"},{"name":"topic:alpha","concept_ref":"ref-topic-alpha"}],"edges":[{"subject_ref":"ref-doc1","predicate_ref":"ref-ms-within-domain","object_ref":"ref-topic-alpha","edge_ref":"ref-edge-1"}]}' fi return 0 fi AMDUAT_LAST_STATUS="404" AMDUAT_LAST_BODY='{"error":"not mocked"}' return 1 } curl() { local body="" local endpoint="" while [[ $# -gt 0 ]]; do case "$1" in --data-binary) body="$2" shift 2 ;; http://*|https://*) endpoint="$1" shift ;; *) shift ;; esac done [[ "${endpoint}" == "${OLLAMA_HOST}/api/generate" ]] || fail "unexpected curl endpoint: ${endpoint}" prompt="$(printf '%s' "${body}" | jq -r '.prompt')" printf '%s' "${prompt}" > "${mock_prompt_file}" printf '%s\n' '{"model":"mock-model","response":"Grounded answer from mock model."}' } json_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_json failed" printf '%s' "${json_out}" | jq -e '.response == "Grounded answer from mock model."' >/dev/null || fail "unexpected response payload" printf '%s' "${json_out}" | jq -e '.evidence | length == 1' >/dev/null || fail "missing evidence" printf '%s' "${json_out}" | jq -e '.grounding.has_evidence == true' >/dev/null || fail "grounding.has_evidence should be true" printf '%s' "${json_out}" | jq -e '.evidence[0].subject == "doc:1" and .evidence[0].predicate == "ms.within_domain" and .evidence[0].object == "topic:alpha"' >/dev/null \ || fail "evidence triplet mismatch" printf '%s' "${json_out}" | jq -e '.evidence[0].predicate_name == "ms.within_domain"' >/dev/null || fail "predicate_name should resolve from schema" [[ -f "${mock_retrieve_path_file}" ]] || fail "retrieve call was not made" [[ "$(cat "${mock_retrieve_path_file}")" == "/v2/graph/retrieve" ]] || fail "retrieve path mismatch" retrieve_payload="$(cat "${mock_retrieve_payload_file}")" printf '%s' "${retrieve_payload}" | jq -e '.roots == ["doc:1"]' >/dev/null || fail "roots payload mismatch" printf '%s' "${retrieve_payload}" | jq -e '.goal_predicates == ["ms.within_domain"]' >/dev/null || fail "goal_predicates payload mismatch" prompt_text="$(cat "${mock_prompt_file}")" [[ "${prompt_text}" == *"Question:"* ]] || fail "prompt missing question label" [[ "${prompt_text}" == *"What domain is doc:1 in?"* ]] || fail "prompt missing question" [[ "${prompt_text}" == *"doc:1 --ms.within_domain--> topic:alpha"* ]] || fail "prompt missing graph edge context" text_out="$(app_ai_answer_text "doc:1" "What domain is doc:1 in?" "ms.within_domain")" || fail "app_ai_answer_text failed" [[ "${text_out}" == "Grounded answer from mock model." ]] || fail "text output mismatch" set +e MOCK_NO_EDGES=1 strict_out="$(app_ai_answer_json "doc:1" "What domain is doc:1 in?" "ms.within_domain" "1")" strict_rc=$? set -e [[ "${strict_rc}" -ne 0 ]] || fail "expected non-zero for --require-evidence with no supporting edges" printf '%s' "${strict_out}" | jq -e '.done_reason == "no_evidence"' >/dev/null || fail "expected done_reason no_evidence" printf '%s' "${strict_out}" | jq -e '.grounding.require_evidence == true and .grounding.has_evidence == false' >/dev/null \ || fail "expected strict grounding flags" echo "ai_answer_eval.sh: PASS"