amduat-api/scripts/dev_start_daemon.sh
2026-02-08 00:07:35 +01:00

297 lines
9.6 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
override_store_root="${STORE_ROOT:-}"
override_store_backend="${STORE_BACKEND:-}"
override_space="${SPACE:-}"
override_sock="${SOCK:-}"
override_amduatd_bin="${AMDUATD_BIN:-}"
override_asl_bin="${ASL_BIN:-}"
override_index_backend_probe="${INDEX_BACKEND_PROBE:-}"
override_index_backend_fallback="${INDEX_BACKEND_FALLBACK:-}"
override_index_backend_repair="${INDEX_BACKEND_REPAIR:-}"
override_fs_fallback_store_root="${FS_FALLBACK_STORE_ROOT:-}"
ENV_FILE="${ROOT_DIR}/config/env.local"
if [[ ! -f "${ENV_FILE}" ]]; then
ENV_FILE="${ROOT_DIR}/config/env.example"
fi
# shellcheck source=/dev/null
source "${ENV_FILE}"
if [[ -n "${override_store_root}" ]]; then STORE_ROOT="${override_store_root}"; fi
if [[ -n "${override_store_backend}" ]]; then STORE_BACKEND="${override_store_backend}"; fi
if [[ -n "${override_space}" ]]; then SPACE="${override_space}"; fi
if [[ -n "${override_sock}" ]]; then SOCK="${override_sock}"; fi
if [[ -n "${override_amduatd_bin}" ]]; then AMDUATD_BIN="${override_amduatd_bin}"; fi
if [[ -n "${override_asl_bin}" ]]; then ASL_BIN="${override_asl_bin}"; fi
if [[ -n "${override_index_backend_probe}" ]]; then INDEX_BACKEND_PROBE="${override_index_backend_probe}"; fi
if [[ -n "${override_index_backend_fallback}" ]]; then INDEX_BACKEND_FALLBACK="${override_index_backend_fallback}"; fi
if [[ -n "${override_index_backend_repair}" ]]; then INDEX_BACKEND_REPAIR="${override_index_backend_repair}"; fi
if [[ -n "${override_fs_fallback_store_root}" ]]; then FS_FALLBACK_STORE_ROOT="${override_fs_fallback_store_root}"; fi
STORE_ROOT="${STORE_ROOT:-${ROOT_DIR}/.amduat-asl}"
STORE_BACKEND="${STORE_BACKEND:-index}"
SPACE="${SPACE:-app1}"
SOCK="${SOCK:-${ROOT_DIR}/amduatd.sock}"
INDEX_BACKEND_PROBE="${INDEX_BACKEND_PROBE:-1}"
INDEX_BACKEND_FALLBACK="${INDEX_BACKEND_FALLBACK:-fs}"
INDEX_BACKEND_REPAIR="${INDEX_BACKEND_REPAIR:-1}"
FS_FALLBACK_STORE_ROOT="${FS_FALLBACK_STORE_ROOT:-${STORE_ROOT}-fs}"
if [[ "${STORE_ROOT}" != /* ]]; then STORE_ROOT="${ROOT_DIR}/${STORE_ROOT}"; fi
if [[ "${SOCK}" != /* ]]; then SOCK="${ROOT_DIR}/${SOCK}"; fi
if [[ "${FS_FALLBACK_STORE_ROOT}" != /* ]]; then FS_FALLBACK_STORE_ROOT="${ROOT_DIR}/${FS_FALLBACK_STORE_ROOT}"; fi
# Try common local build paths first, then PATH.
AMDUATD_BIN="${AMDUATD_BIN:-}"
if [[ -z "${AMDUATD_BIN}" ]]; then
for cand in \
"${ROOT_DIR}/vendor/amduat-api/build/amduatd" \
"${ROOT_DIR}/vendor/amduat-api/build-asan/amduatd"; do
if [[ -x "${cand}" ]]; then
AMDUATD_BIN="${cand}"
break
fi
done
if [[ -z "${AMDUATD_BIN}" ]] && command -v amduatd >/dev/null 2>&1; then
AMDUATD_BIN="$(command -v amduatd)"
fi
fi
ASL_BIN="${ASL_BIN:-}"
if [[ -z "${ASL_BIN}" ]]; then
for cand in \
"${ROOT_DIR}/vendor/amduat-api/vendor/amduat/build/amduat-asl" \
"${ROOT_DIR}/vendor/amduat-api/build/vendor/amduat/amduat-asl"; do
if [[ -x "${cand}" ]]; then
ASL_BIN="${cand}"
break
fi
done
if [[ -z "${ASL_BIN}" ]] && command -v amduat-asl >/dev/null 2>&1; then
ASL_BIN="$(command -v amduat-asl)"
fi
fi
if [[ -z "${AMDUATD_BIN}" || ! -x "${AMDUATD_BIN}" ]]; then
echo "missing amduatd binary; set AMDUATD_BIN" >&2
exit 1
fi
if [[ -z "${ASL_BIN}" || ! -x "${ASL_BIN}" ]]; then
echo "missing amduat-asl binary; set ASL_BIN" >&2
exit 1
fi
init_store() {
local backend="$1"
local root="$2"
mkdir -p "${root}"
if [[ "${backend}" == "index" ]]; then
if ! "${ASL_BIN}" index state --root "${root}" >/dev/null 2>&1; then
echo "initializing index-backed ASL store at ${root}" >&2
"${ASL_BIN}" index init --root "${root}"
fi
else
if ! "${ASL_BIN}" log inspect --root "${root}" >/dev/null 2>&1; then
echo "initializing ASL store at ${root}" >&2
"${ASL_BIN}" init --root "${root}"
fi
fi
}
wait_ready() {
local sock="$1"
for _ in $(seq 1 80); do
if [[ -S "${sock}" ]] && curl --globoff --silent --show-error --unix-socket "${sock}" "http://localhost/v2/readyz" >/dev/null 2>&1; then
return 0
fi
sleep 0.1
done
return 1
}
probe_index_write_path() {
local sock="$1"
local space="$2"
local probe_idx
for probe_idx in 1 2 3 4 5; do
local run_id
run_id="$(date +%s%N)${RANDOM}${probe_idx}"
# Keep probe names conservative (alnum only) across backend/name-policy variants.
local doc="probe${run_id}a"
local topic="probe${run_id}b"
local payload
payload="$(cat <<JSON
{
"idempotency_key":"probe-${run_id}",
"mode":"continue_on_error",
"nodes":[{"name":"${doc}"},{"name":"${topic}"}],
"edges":[
{
"subject":"${doc}",
"predicate":"ms.within_domain",
"object":"${topic}",
"provenance":{
"source_uri":"urn:probe:index",
"extractor":"dev-start-probe",
"observed_at":1,
"ingested_at":2,
"trace_id":"probe-${run_id}"
}
}
]
}
JSON
)"
local raw out code
raw="$(curl --globoff --silent --show-error --unix-socket "${sock}" \
-H "Content-Type: application/json" \
-H "X-Amduat-Space: ${space}" \
-X POST --data-binary "${payload}" \
-w $'\n%{http_code}' \
"http://localhost/v2/graph/batch")" || return 1
code="${raw##*$'\n'}"
out="${raw%$'\n'*}"
if [[ "${code}" != "200" ]]; then
echo "index probe HTTP ${code}: ${out}" >&2
return 1
fi
# A successful backend health check is "node+edge write path is healthy for repeated valid payloads".
if command -v jq >/dev/null 2>&1; then
if ! printf '%s' "${out}" | jq -e '.' >/dev/null 2>&1; then
echo "index probe returned non-JSON payload: ${out}" >&2
return 1
fi
if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; ((.code // 0) | tonumber) >= 500)' >/dev/null 2>&1; then
echo "index probe saw server error result: ${out}" >&2
return 1
fi
if printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; (.status == "error") or (((.code // 0) | tonumber) >= 400))' >/dev/null 2>&1; then
echo "index probe saw non-success result: ${out}" >&2
return 1
fi
if ! printf '%s' "${out}" | jq -e '([(.results // [] )[]? | select(.kind == "node" and .status == "applied" and ((.code // 0) | tonumber) < 300)] | length) >= 2' >/dev/null 2>&1; then
echo "index probe missing applied node writes: ${out}" >&2
return 1
fi
if ! printf '%s' "${out}" | jq -e 'any((.results // [] )[]?; .kind == "edge" and .status == "applied" and ((.code // 0) | tonumber) < 300)' >/dev/null 2>&1; then
echo "index probe missing applied edge write: ${out}" >&2
return 1
fi
if ! printf '%s' "${out}" | jq -e '.ok == true' >/dev/null 2>&1; then
echo "index probe non-ok payload: ${out}" >&2
return 1
fi
continue
fi
if [[ "${out}" == *'"ok":true'* ]]; then
continue
fi
if [[ "${out}" == *'"code":5'* ]]; then
echo "index probe saw 5xx result: ${out}" >&2
return 1
fi
if [[ "${out}" == *'"ok":false'* || "${out}" == *'"status":"error"'* ]]; then
echo "index probe non-ok payload: ${out}" >&2
return 1
fi
echo "index probe unexpected payload: ${out}" >&2
return 1
done
return 0
}
run_daemon_foreground() {
local backend="$1"
local root="$2"
echo "starting amduatd: root=${root} sock=${SOCK} backend=${backend} space=${SPACE}" >&2
exec "${AMDUATD_BIN}" --root "${root}" --sock "${SOCK}" --store-backend "${backend}" --space "${SPACE}"
}
start_probe_daemon() {
echo "starting amduatd (probe mode): root=${STORE_ROOT} sock=${SOCK} backend=${STORE_BACKEND} space=${SPACE}" >&2
"${AMDUATD_BIN}" --root "${STORE_ROOT}" --sock "${SOCK}" --store-backend "${STORE_BACKEND}" --space "${SPACE}" &
daemon_pid=$!
cleanup_probe() {
kill "${daemon_pid}" >/dev/null 2>&1 || true
}
trap cleanup_probe EXIT
}
stop_probe_daemon() {
kill "${daemon_pid}" >/dev/null 2>&1 || true
wait "${daemon_pid}" >/dev/null 2>&1 || true
trap - EXIT
}
repair_index_store() {
local root="$1"
local backup_root="${root}.bak-$(date +%Y%m%d-%H%M%S)"
if [[ -d "${root}" ]]; then
echo "backing up index store root to ${backup_root}" >&2
mv "${root}" "${backup_root}"
fi
mkdir -p "${root}"
echo "reinitializing index-backed ASL store at ${root}" >&2
"${ASL_BIN}" index init --root "${root}" --force
}
init_store "${STORE_BACKEND}" "${STORE_ROOT}"
if [[ "${STORE_BACKEND}" != "index" || "${INDEX_BACKEND_PROBE}" != "1" ]]; then
run_daemon_foreground "${STORE_BACKEND}" "${STORE_ROOT}"
fi
start_probe_daemon
fallback_to_fs=0
if ! wait_ready "${SOCK}"; then
echo "index backend startup/readiness failed" >&2
fallback_to_fs=1
elif ! probe_index_write_path "${SOCK}" "${SPACE}"; then
echo "index backend write probe failed" >&2
fallback_to_fs=1
fi
if [[ "${fallback_to_fs}" == "0" ]]; then
trap - EXIT
wait "${daemon_pid}"
exit $?
fi
stop_probe_daemon
if [[ "${INDEX_BACKEND_REPAIR}" == "1" ]]; then
echo "attempting index store repair before fs fallback" >&2
repair_index_store "${STORE_ROOT}"
start_probe_daemon
repaired_ok=0
if wait_ready "${SOCK}" && probe_index_write_path "${SOCK}" "${SPACE}"; then
repaired_ok=1
fi
if [[ "${repaired_ok}" == "1" ]]; then
trap - EXIT
wait "${daemon_pid}"
exit $?
fi
echo "index backend repair failed" >&2
stop_probe_daemon
fi
if [[ "${INDEX_BACKEND_FALLBACK}" != "fs" ]]; then
echo "set INDEX_BACKEND_FALLBACK=fs to auto-fallback, or INDEX_BACKEND_PROBE=0 to disable probe" >&2
exit 1
fi
echo "falling back to fs backend: root=${FS_FALLBACK_STORE_ROOT}" >&2
init_store "fs" "${FS_FALLBACK_STORE_ROOT}"
run_daemon_foreground "fs" "${FS_FALLBACK_STORE_ROOT}"