36 lines
1 KiB
Bash
36 lines
1 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
# Creates Iceberg table for assistant message ingest.
|
||
|
|
# Default table: lake.db1.messages
|
||
|
|
|
||
|
|
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
|
||
|
|
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
|
||
|
|
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
|
||
|
|
MESSAGES_TABLE="${MESSAGES_TABLE:-lake.db1.messages}"
|
||
|
|
|
||
|
|
SQL="
|
||
|
|
CREATE NAMESPACE IF NOT EXISTS lake.db1;
|
||
|
|
|
||
|
|
CREATE TABLE IF NOT EXISTS ${MESSAGES_TABLE} (
|
||
|
|
thread_id STRING,
|
||
|
|
message_id STRING,
|
||
|
|
sender STRING,
|
||
|
|
channel STRING,
|
||
|
|
sent_at TIMESTAMP,
|
||
|
|
body STRING,
|
||
|
|
metadata_json STRING
|
||
|
|
)
|
||
|
|
USING iceberg
|
||
|
|
PARTITIONED BY (days(sent_at));
|
||
|
|
"
|
||
|
|
|
||
|
|
docker exec \
|
||
|
|
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
|
||
|
|
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
|
||
|
|
"$CONTAINER_NAME" \
|
||
|
|
/opt/spark/bin/spark-sql \
|
||
|
|
--properties-file "$SPARK_PROPS" \
|
||
|
|
--packages "$PACKAGES" \
|
||
|
|
-e "$SQL"
|