amduat-api/tests/integration_v2.sh

#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
# shellcheck source=/dev/null
source "${ROOT_DIR}/src/app_v2.sh"

require_jq() {
  if ! command -v jq >/dev/null 2>&1; then
    echo "jq is required for integration_v2.sh" >&2
    exit 2
  fi
}

assert_contains() {
  local haystack="$1"
  local needle="$2"
  if [[ "${haystack}" != *"${needle}"* ]]; then
    echo "assertion failed: expected to find ${needle}" >&2
    exit 1
  fi
}

# Include deterministic changes-consumer cursor semantics checks
# in the regular integration entrypoint.
"${ROOT_DIR}/tests/changes_consumer_handler.sh"

require_jq
IT_USE_EXISTING_DAEMON="${IT_USE_EXISTING_DAEMON:-0}"
it_started_daemon=0
it_daemon_pid=""
it_root="${IT_STORE_ROOT:-/tmp/amduat-asl-it-${USER:-user}}"
it_sock="${IT_SOCK:-/tmp/amduatd-it-${USER:-user}.sock}"
it_backend="${IT_STORE_BACKEND:-fs}"
it_log="${IT_DAEMON_LOG_PATH:-/tmp/integration-v2-daemon.log}"

cleanup() {
  if [[ "${it_started_daemon}" == "1" && -n "${it_daemon_pid}" ]]; then
    kill "${it_daemon_pid}" >/dev/null 2>&1 || true
    wait "${it_daemon_pid}" >/dev/null 2>&1 || true
  fi
  if [[ "${IT_USE_EXISTING_DAEMON}" != "1" ]]; then
    rm -f "${it_sock}" >/dev/null 2>&1 || true
  fi
}
trap cleanup EXIT

if [[ "${IT_USE_EXISTING_DAEMON}" != "1" ]]; then
  rm -f "${it_sock}" >/dev/null 2>&1 || true
  export SOCK="${it_sock}"
  STORE_BACKEND="${it_backend}" STORE_ROOT="${it_root}" SOCK="${it_sock}" SPACE="${SPACE:-app1}" \
    nohup "${ROOT_DIR}/scripts/dev_start_daemon.sh" >"${it_log}" 2>&1 &
  it_daemon_pid="$!"
  it_started_daemon=1

  ready=0
  for _ in $(seq 1 120); do
    if curl --globoff --silent --show-error --unix-socket "${it_sock}" "http://localhost/v2/readyz" >/dev/null 2>&1; then
      ready=1
      break
    fi
    sleep 0.1
  done
  if [[ "${ready}" != "1" ]]; then
    echo "integration_v2.sh: FAIL (isolated daemon did not become ready, log: ${it_log})" >&2
    exit 1
  fi
fi

app_init
if [[ ! -S "${SOCK}" ]]; then
  echo "integration_v2.sh: SKIP (socket not found at ${SOCK})"
  exit 77
fi

# 1) startup checks
startup_out="$(app_startup_checks)"
assert_contains "${startup_out}" '"ok"'

# 2) idempotent ingest (batch + continue_on_error)
run_id="$(date +%s)"
trace_id="trace-it-${run_id}"
idempotency_key="it-seed-${run_id}"
doc_name="docit${run_id}"
topic_name="topicitalpha${run_id}"
payload="$(cat <<JSON
{
  "idempotency_key":"${idempotency_key}",
  "mode":"continue_on_error",
  "nodes":[{"name":"${doc_name}"},{"name":"${topic_name}"}],
  "edges":[
    {
      "subject":"${doc_name}",
      "predicate":"ms.within_domain",
      "object":"${topic_name}",
      "provenance":{
        "source_uri":"urn:test:seed",
        "extractor":"integration-test",
        "observed_at":1,
        "ingested_at":2,
        "trace_id":"${trace_id}"
      }
    }
  ]
}
JSON
)"
ingest_out="$(app_ingest_batch "${payload}")"
assert_contains "${ingest_out}" '"ok":true'

# Re-submit same idempotency key + identical payload.
ingest_out_2="$(app_ingest_batch "${payload}")"
assert_contains "${ingest_out_2}" '"idempotency_key"'

# 3) incremental sync with durable opaque cursor
rm -f "${CURSOR_FILE}"
sync_out="$(app_sync_once)"
assert_contains "${sync_out}" '"events"'
[[ -s "${CURSOR_FILE}" ]] || { echo "cursor file not persisted" >&2; exit 1; }

# 4) retrieval endpoint + fallback path available
retrieve_out="$(app_retrieve_with_fallback "${doc_name}" "ms.within_domain")"
assert_contains "${retrieve_out}" '"edges"'

# 4b) optional live AI-over-retrieval path (requires reachable Ollama)
if [[ "${RUN_AI_RETRIEVE_LIVE:-0}" == "1" ]]; then
  ai_answer_out="$(app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain")"
  printf '%s' "${ai_answer_out}" | jq -e '.response | type == "string"' >/dev/null || {
    echo "expected ai-answer to return JSON with response text" >&2
    exit 1
  }
  printf '%s' "${ai_answer_out}" | jq -e '.evidence | type == "array"' >/dev/null || {
    echo "expected ai-answer to include evidence array" >&2
    exit 1
  }
  app_ai_answer_json "${doc_name}" "Which topic is this document within?" "ms.within_domain" "1" >/dev/null || {
    echo "expected strict ai-answer to succeed when evidence exists" >&2
    exit 1
  }
fi

# Capture edge_ref using subgraph surface to avoid format differences.
subgraph_out="$(amduat_api_call GET "/v2/graph/subgraph?roots[]=${doc_name}&max_depth=2&dir=outgoing&limit_nodes=200&limit_edges=400&include_stats=true&max_result_bytes=1048576" && printf '%s' "${AMDUAT_LAST_BODY}")"
edge_ref="$(printf '%s' "${subgraph_out}" | jq -r '.edges[0].edge_ref // empty')"
if [[ -z "${edge_ref}" ]]; then
  echo "failed to resolve edge_ref" >&2
  exit 1
fi

# 5) correction path and tombstone visibility semantics
app_tombstone_edge "${edge_ref}" >/dev/null
post_tombstone_retrieve="$(app_retrieve_with_fallback "${doc_name}" "ms.within_domain")"
post_edges_count="$(printf '%s' "${post_tombstone_retrieve}" | jq '.edges | length')"
if [[ "${post_edges_count}" != "0" ]]; then
  echo "expected retrieval default to hide tombstoned edges" >&2
  exit 1
fi

visible_tombstone="$(amduat_api_call GET "/v2/graph/subgraph?roots[]=${doc_name}&max_depth=2&dir=outgoing&limit_nodes=200&limit_edges=400&include_tombstoned=true&max_result_bytes=1048576" && printf '%s' "${AMDUAT_LAST_BODY}")"
assert_contains "${visible_tombstone}" '"edges"'

echo "integration_v2.sh: PASS"