jecio/record-assistant-feedback-via-spark-container.sh
2026-02-14 21:10:26 +01:00

59 lines
2 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
FEEDBACK_TABLE="${FEEDBACK_TABLE:-lake.db1.assistant_feedback}"
FEEDBACK_ID="${1:-}"
CREATED_AT_UTC="${2:-}"
OUTCOME="${3:-}"
TASK_TYPE="${4:-}"
RELEASE_NAME="${5:-}"
CONFIDENCE="${6:-0}"
NEEDS_REVIEW="${7:-true}"
GOAL_B64="${8:-}"
DRAFT_B64="${9:-}"
FINAL_B64="${10:-}"
SOURCES_B64="${11:-}"
NOTES_B64="${12:-}"
if [[ -z "$FEEDBACK_ID" || -z "$CREATED_AT_UTC" || -z "$OUTCOME" || -z "$TASK_TYPE" ]]; then
echo "Usage: $0 <feedback_id> <created_at_utc> <outcome> <task_type> <release_name> <confidence> <needs_review> <goal_b64> <draft_b64> <final_b64> <sources_b64> <notes_b64>" >&2
exit 1
fi
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./write_assistant_feedback.py}"
SCRIPT_REMOTE="/tmp/write_assistant_feedback.py"
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
echo "write_assistant_feedback.py not found at: $SCRIPT_LOCAL" >&2
exit 1
fi
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
docker exec \
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
"$CONTAINER_NAME" \
/opt/spark/bin/spark-submit \
--properties-file "$SPARK_PROPS" \
--packages "$PACKAGES" \
"$SCRIPT_REMOTE" \
--table "$FEEDBACK_TABLE" \
--feedback-id "$FEEDBACK_ID" \
--created-at-utc "$CREATED_AT_UTC" \
--outcome "$OUTCOME" \
--task-type "$TASK_TYPE" \
--release-name "$RELEASE_NAME" \
--confidence "$CONFIDENCE" \
--needs-review "$NEEDS_REVIEW" \
--goal-b64 "$GOAL_B64" \
--draft-b64 "$DRAFT_B64" \
--final-b64 "$FINAL_B64" \
--sources-b64 "$SOURCES_B64" \
--notes-b64 "$NOTES_B64"
echo "[DONE] Recorded assistant feedback ${FEEDBACK_ID} into ${FEEDBACK_TABLE}"