#!/usr/bin/env bash set -euo pipefail # Args: # 1 run_id # 2 run_type # 3 status # 4 started_at_utc # 5 finished_at_utc (or empty) # 6 actor # 7 input_json_b64 # 8 output_json_b64 # 9 error_text_b64 RUN_ID="${1:-}" RUN_TYPE="${2:-}" STATUS="${3:-}" STARTED_AT_UTC="${4:-}" FINISHED_AT_UTC="${5:-}" ACTOR="${6:-}" INPUT_JSON_B64="${7:-}" OUTPUT_JSON_B64="${8:-}" ERROR_TEXT_B64="${9:-}" if [[ -z "$RUN_ID" || -z "$RUN_TYPE" || -z "$STATUS" || -z "$STARTED_AT_UTC" ]]; then echo "usage: $0 " >&2 exit 1 fi CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}" SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}" PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}" RUNS_TABLE="${RUNS_TABLE:-lake.db1.runs}" decode_b64() { local s="$1" if [[ -z "$s" ]]; then printf "" return fi printf '%s' "$s" | base64 -d } escape_sql() { sed "s/'/''/g" } INPUT_JSON="$(decode_b64 "$INPUT_JSON_B64" | escape_sql)" OUTPUT_JSON="$(decode_b64 "$OUTPUT_JSON_B64" | escape_sql)" ERROR_TEXT="$(decode_b64 "$ERROR_TEXT_B64" | escape_sql)" RUN_ID_ESC="$(printf '%s' "$RUN_ID" | escape_sql)" RUN_TYPE_ESC="$(printf '%s' "$RUN_TYPE" | escape_sql)" STATUS_ESC="$(printf '%s' "$STATUS" | escape_sql)" STARTED_ESC="$(printf '%s' "$STARTED_AT_UTC" | escape_sql)" FINISHED_ESC="$(printf '%s' "$FINISHED_AT_UTC" | escape_sql)" ACTOR_ESC="$(printf '%s' "$ACTOR" | escape_sql)" SQL=" CREATE TABLE IF NOT EXISTS ${RUNS_TABLE} ( run_id STRING, run_type STRING, status STRING, started_at_utc STRING, finished_at_utc STRING, actor STRING, input_json STRING, output_json STRING, error_text STRING, ingested_at_utc STRING ) USING iceberg; INSERT INTO ${RUNS_TABLE} VALUES ( '${RUN_ID_ESC}', '${RUN_TYPE_ESC}', '${STATUS_ESC}', '${STARTED_ESC}', '${FINISHED_ESC}', '${ACTOR_ESC}', '${INPUT_JSON}', '${OUTPUT_JSON}', '${ERROR_TEXT}', '${STARTED_ESC}' ); " docker exec \ -e AWS_REGION="${AWS_REGION:-us-east-1}" \ -e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \ "$CONTAINER_NAME" \ /opt/spark/bin/spark-sql \ --properties-file "$SPARK_PROPS" \ --packages "$PACKAGES" \ -e "$SQL"