#!/usr/bin/env bash set -euo pipefail # Creates Iceberg table for assistant message ingest. # Default table: lake.db1.messages CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}" SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}" PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}" MESSAGES_TABLE="${MESSAGES_TABLE:-lake.db1.messages}" SQL=" CREATE NAMESPACE IF NOT EXISTS lake.db1; CREATE TABLE IF NOT EXISTS ${MESSAGES_TABLE} ( thread_id STRING, message_id STRING, sender STRING, channel STRING, sent_at TIMESTAMP, body STRING, metadata_json STRING ) USING iceberg PARTITIONED BY (days(sent_at)); " docker exec \ -e AWS_REGION="${AWS_REGION:-us-east-1}" \ -e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \ "$CONTAINER_NAME" \ /opt/spark/bin/spark-sql \ --properties-file "$SPARK_PROPS" \ --packages "$PACKAGES" \ -e "$SQL"