37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
|
||
|
|
from pyspark.sql import SparkSession
|
||
|
|
from pyspark.sql import functions as F
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
p = argparse.ArgumentParser(description="Query assistant proposal rows")
|
||
|
|
p.add_argument("--table", default=os.getenv("PROPOSAL_TABLE", "lake.db1.assistant_proposals"))
|
||
|
|
p.add_argument("--release-name", default="")
|
||
|
|
p.add_argument("--proposal-set-id", default="")
|
||
|
|
p.add_argument("--limit", type=int, default=200)
|
||
|
|
args = p.parse_args()
|
||
|
|
|
||
|
|
spark = SparkSession.builder.appName("query-assistant-proposals").getOrCreate()
|
||
|
|
df = spark.table(args.table)
|
||
|
|
|
||
|
|
if args.release_name:
|
||
|
|
df = df.where(F.col("release_name") == args.release_name)
|
||
|
|
if args.proposal_set_id:
|
||
|
|
df = df.where(F.col("proposal_set_id") == args.proposal_set_id)
|
||
|
|
|
||
|
|
rows = (
|
||
|
|
df.orderBy(F.col("created_at_utc").desc_nulls_last(), F.col("proposal_id").asc_nulls_last())
|
||
|
|
.limit(max(1, min(args.limit, 2000)))
|
||
|
|
.collect()
|
||
|
|
)
|
||
|
|
|
||
|
|
out = [r.asDict(recursive=True) for r in rows]
|
||
|
|
print(json.dumps(out, ensure_ascii=False))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|