feat(self-improve): persist proposal sets to Iceberg with history and apply-by-id
This commit is contained in:
parent
6b75c0b596
commit
c6dad7427a
279
app.py
279
app.py
|
|
@ -76,6 +76,14 @@ ASSISTANT_ACTIONS_QUERY_REMOTE_SCRIPT = os.getenv(
|
||||||
"ASSISTANT_ACTIONS_QUERY_REMOTE_SCRIPT",
|
"ASSISTANT_ACTIONS_QUERY_REMOTE_SCRIPT",
|
||||||
"./query-assistant-actions-via-spark-container.sh",
|
"./query-assistant-actions-via-spark-container.sh",
|
||||||
)
|
)
|
||||||
|
ASSISTANT_PROPOSALS_REMOTE_SCRIPT = os.getenv(
|
||||||
|
"ASSISTANT_PROPOSALS_REMOTE_SCRIPT",
|
||||||
|
"./record-assistant-proposals-via-spark-container.sh",
|
||||||
|
)
|
||||||
|
ASSISTANT_PROPOSALS_QUERY_REMOTE_SCRIPT = os.getenv(
|
||||||
|
"ASSISTANT_PROPOSALS_QUERY_REMOTE_SCRIPT",
|
||||||
|
"./query-assistant-proposals-via-spark-container.sh",
|
||||||
|
)
|
||||||
IMAP_CHECKPOINT_REMOTE_SCRIPT = os.getenv(
|
IMAP_CHECKPOINT_REMOTE_SCRIPT = os.getenv(
|
||||||
"IMAP_CHECKPOINT_REMOTE_SCRIPT",
|
"IMAP_CHECKPOINT_REMOTE_SCRIPT",
|
||||||
"./query-imap-checkpoint-via-spark-container.sh",
|
"./query-imap-checkpoint-via-spark-container.sh",
|
||||||
|
|
@ -316,6 +324,8 @@ class AssistantSelfImproveProposal(BaseModel):
|
||||||
class AssistantSelfImproveResponse(BaseModel):
|
class AssistantSelfImproveResponse(BaseModel):
|
||||||
objective: str
|
objective: str
|
||||||
release_name: Optional[str] = None
|
release_name: Optional[str] = None
|
||||||
|
proposal_set_id: str
|
||||||
|
created_at_utc: str
|
||||||
summary: str
|
summary: str
|
||||||
proposals: List[AssistantSelfImproveProposal]
|
proposals: List[AssistantSelfImproveProposal]
|
||||||
signals: Dict[str, Any]
|
signals: Dict[str, Any]
|
||||||
|
|
@ -326,7 +336,9 @@ class AssistantSelfImproveResponse(BaseModel):
|
||||||
class AssistantSelfImproveApplyPayload(BaseModel):
|
class AssistantSelfImproveApplyPayload(BaseModel):
|
||||||
objective: Optional[str] = None
|
objective: Optional[str] = None
|
||||||
release_name: Optional[str] = None
|
release_name: Optional[str] = None
|
||||||
proposal: AssistantSelfImproveProposal
|
proposal_set_id: Optional[str] = None
|
||||||
|
proposal_id: Optional[str] = None
|
||||||
|
proposal: Optional[AssistantSelfImproveProposal] = None
|
||||||
dry_run: bool = False
|
dry_run: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1941,6 +1953,109 @@ async def run_remote_query_assistant_actions(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def run_remote_record_assistant_proposals(
|
||||||
|
proposal_set_id: str,
|
||||||
|
created_at_utc: str,
|
||||||
|
objective: str,
|
||||||
|
release_name: Optional[str],
|
||||||
|
summary: str,
|
||||||
|
signals: Dict[str, Any],
|
||||||
|
proposals: List[Dict[str, Any]],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
ASSISTANT_PROPOSALS_REMOTE_SCRIPT,
|
||||||
|
proposal_set_id,
|
||||||
|
created_at_utc,
|
||||||
|
_b64(objective or ""),
|
||||||
|
release_name or "",
|
||||||
|
_b64(summary or ""),
|
||||||
|
_b64(json.dumps(signals or {}, ensure_ascii=False)),
|
||||||
|
_b64(json.dumps(proposals or [], ensure_ascii=False)),
|
||||||
|
]
|
||||||
|
command = f"cd {shlex.quote(PROJECTOR_REMOTE_DIR)} && {' '.join(shlex.quote(p) for p in parts)}"
|
||||||
|
ssh_args = [PROJECTOR_SSH_BIN, *shlex.split(PROJECTOR_SSH_OPTS), PROJECTOR_SSH_HOST, command]
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*ssh_args,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=PROJECTOR_TIMEOUT_SEC)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
raise HTTPException(status_code=504, detail="Assistant proposals logging timed out")
|
||||||
|
|
||||||
|
out = stdout.decode("utf-8", errors="replace")
|
||||||
|
err = stderr.decode("utf-8", errors="replace")
|
||||||
|
result = {
|
||||||
|
"host": PROJECTOR_SSH_HOST,
|
||||||
|
"remote_dir": PROJECTOR_REMOTE_DIR,
|
||||||
|
"exit_code": proc.returncode,
|
||||||
|
"stdout_tail": _tail(out),
|
||||||
|
"stderr_tail": _tail(err),
|
||||||
|
}
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def run_remote_query_assistant_proposals(
|
||||||
|
release_name: Optional[str],
|
||||||
|
proposal_set_id: Optional[str],
|
||||||
|
limit: int,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
ASSISTANT_PROPOSALS_QUERY_REMOTE_SCRIPT,
|
||||||
|
release_name or "",
|
||||||
|
proposal_set_id or "",
|
||||||
|
str(limit),
|
||||||
|
]
|
||||||
|
command = f"cd {shlex.quote(PROJECTOR_REMOTE_DIR)} && {' '.join(shlex.quote(p) for p in parts)}"
|
||||||
|
ssh_args = [PROJECTOR_SSH_BIN, *shlex.split(PROJECTOR_SSH_OPTS), PROJECTOR_SSH_HOST, command]
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*ssh_args,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=PROJECTOR_TIMEOUT_SEC)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
raise HTTPException(status_code=504, detail="Assistant proposals query timed out")
|
||||||
|
|
||||||
|
out = stdout.decode("utf-8", errors="replace")
|
||||||
|
err = stderr.decode("utf-8", errors="replace")
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"host": PROJECTOR_SSH_HOST,
|
||||||
|
"remote_dir": PROJECTOR_REMOTE_DIR,
|
||||||
|
"exit_code": proc.returncode,
|
||||||
|
"stdout_tail": _tail(out),
|
||||||
|
"stderr_tail": _tail(err),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
rows = _extract_json_array_from_text(out)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": f"Unable to parse proposals query output: {e}",
|
||||||
|
"stdout_tail": _tail(out),
|
||||||
|
"stderr_tail": _tail(err),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"host": PROJECTOR_SSH_HOST,
|
||||||
|
"remote_dir": PROJECTOR_REMOTE_DIR,
|
||||||
|
"rows": rows,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def run_remote_record_run(
|
async def run_remote_record_run(
|
||||||
run_id: str,
|
run_id: str,
|
||||||
run_type: str,
|
run_type: str,
|
||||||
|
|
@ -2567,6 +2682,26 @@ async def assistant_self_improve(
|
||||||
if not proposals:
|
if not proposals:
|
||||||
proposals = fallback_self_improve_proposals()[: payload.max_proposals]
|
proposals = fallback_self_improve_proposals()[: payload.max_proposals]
|
||||||
|
|
||||||
|
created_at_utc = now_iso()
|
||||||
|
proposal_set_id = str(uuid.uuid4())
|
||||||
|
signals = {
|
||||||
|
"feedback_rows": len(feedback_rows),
|
||||||
|
"blocked_action_rows": len(action_rows),
|
||||||
|
}
|
||||||
|
proposal_dicts = [p.model_dump() for p in proposals[: payload.max_proposals]]
|
||||||
|
try:
|
||||||
|
await run_remote_record_assistant_proposals(
|
||||||
|
proposal_set_id=proposal_set_id,
|
||||||
|
created_at_utc=created_at_utc,
|
||||||
|
objective=payload.objective,
|
||||||
|
release_name=payload.release_name,
|
||||||
|
summary=summary,
|
||||||
|
signals=signals,
|
||||||
|
proposals=proposal_dicts,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] self-improve proposal persistence failed: {e}")
|
||||||
|
|
||||||
apply_blocked = payload.apply
|
apply_blocked = payload.apply
|
||||||
apply_block_reason: Optional[str] = None
|
apply_block_reason: Optional[str] = None
|
||||||
if payload.apply:
|
if payload.apply:
|
||||||
|
|
@ -2575,17 +2710,98 @@ async def assistant_self_improve(
|
||||||
return AssistantSelfImproveResponse(
|
return AssistantSelfImproveResponse(
|
||||||
objective=payload.objective,
|
objective=payload.objective,
|
||||||
release_name=payload.release_name,
|
release_name=payload.release_name,
|
||||||
|
proposal_set_id=proposal_set_id,
|
||||||
|
created_at_utc=created_at_utc,
|
||||||
summary=summary,
|
summary=summary,
|
||||||
proposals=proposals[: payload.max_proposals],
|
proposals=proposals[: payload.max_proposals],
|
||||||
signals={
|
signals=signals,
|
||||||
"feedback_rows": len(feedback_rows),
|
|
||||||
"blocked_action_rows": len(action_rows),
|
|
||||||
},
|
|
||||||
apply_blocked=apply_blocked,
|
apply_blocked=apply_blocked,
|
||||||
apply_block_reason=apply_block_reason,
|
apply_block_reason=apply_block_reason,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/assistant/self-improve/history")
|
||||||
|
async def assistant_self_improve_history(
|
||||||
|
release_name: Optional[str] = None,
|
||||||
|
proposal_set_id: Optional[str] = None,
|
||||||
|
limit: int = 200,
|
||||||
|
x_admin_api_key: Optional[str] = Header(default=None),
|
||||||
|
):
|
||||||
|
check_admin_api_key(x_admin_api_key)
|
||||||
|
bounded_limit = max(1, min(limit, 2000))
|
||||||
|
result = await run_remote_query_assistant_proposals(
|
||||||
|
release_name=release_name,
|
||||||
|
proposal_set_id=proposal_set_id,
|
||||||
|
limit=bounded_limit,
|
||||||
|
)
|
||||||
|
rows = result["rows"]
|
||||||
|
|
||||||
|
grouped: Dict[str, Dict[str, Any]] = {}
|
||||||
|
for r in rows:
|
||||||
|
set_id = str(r.get("proposal_set_id") or "")
|
||||||
|
if not set_id:
|
||||||
|
continue
|
||||||
|
g = grouped.get(set_id)
|
||||||
|
if g is None:
|
||||||
|
signals_json = r.get("signals_json") or "{}"
|
||||||
|
try:
|
||||||
|
signals = json.loads(signals_json)
|
||||||
|
if not isinstance(signals, dict):
|
||||||
|
signals = {}
|
||||||
|
except Exception:
|
||||||
|
signals = {}
|
||||||
|
g = {
|
||||||
|
"proposal_set_id": set_id,
|
||||||
|
"created_at_utc": r.get("created_at_utc"),
|
||||||
|
"objective": r.get("objective"),
|
||||||
|
"release_name": r.get("release_name"),
|
||||||
|
"summary": r.get("summary"),
|
||||||
|
"signals": signals,
|
||||||
|
"proposals": [],
|
||||||
|
}
|
||||||
|
grouped[set_id] = g
|
||||||
|
|
||||||
|
files_json = r.get("files_json") or "[]"
|
||||||
|
tests_json = r.get("tests_json") or "[]"
|
||||||
|
try:
|
||||||
|
files = json.loads(files_json)
|
||||||
|
if not isinstance(files, list):
|
||||||
|
files = []
|
||||||
|
except Exception:
|
||||||
|
files = []
|
||||||
|
try:
|
||||||
|
tests = json.loads(tests_json)
|
||||||
|
if not isinstance(tests, list):
|
||||||
|
tests = []
|
||||||
|
except Exception:
|
||||||
|
tests = []
|
||||||
|
|
||||||
|
g["proposals"].append(
|
||||||
|
{
|
||||||
|
"proposal_id": r.get("proposal_id"),
|
||||||
|
"title": r.get("title"),
|
||||||
|
"problem": r.get("problem"),
|
||||||
|
"change": r.get("change_text"),
|
||||||
|
"files": files,
|
||||||
|
"risk": r.get("risk") or "medium",
|
||||||
|
"tests": tests,
|
||||||
|
"auto_apply_safe": bool(r.get("auto_apply_safe", False)),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
sets = list(grouped.values())
|
||||||
|
sets.sort(key=lambda x: str(x.get("created_at_utc") or ""), reverse=True)
|
||||||
|
return {
|
||||||
|
"count": len(sets),
|
||||||
|
"filters": {
|
||||||
|
"release_name": release_name,
|
||||||
|
"proposal_set_id": proposal_set_id,
|
||||||
|
"limit": bounded_limit,
|
||||||
|
},
|
||||||
|
"rows": sets,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/assistant/self-improve/apply", response_model=AssistantSelfImproveApplyResponse)
|
@app.post("/assistant/self-improve/apply", response_model=AssistantSelfImproveApplyResponse)
|
||||||
async def assistant_self_improve_apply(
|
async def assistant_self_improve_apply(
|
||||||
payload: AssistantSelfImproveApplyPayload,
|
payload: AssistantSelfImproveApplyPayload,
|
||||||
|
|
@ -2593,7 +2809,58 @@ async def assistant_self_improve_apply(
|
||||||
):
|
):
|
||||||
check_admin_api_key(x_admin_api_key)
|
check_admin_api_key(x_admin_api_key)
|
||||||
repo_dir = Path(__file__).resolve().parent
|
repo_dir = Path(__file__).resolve().parent
|
||||||
proposal = payload.proposal
|
proposal: Optional[AssistantSelfImproveProposal] = payload.proposal
|
||||||
|
if proposal is None:
|
||||||
|
if not (payload.proposal_set_id and payload.proposal_id):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Provide either proposal object or both proposal_set_id and proposal_id.",
|
||||||
|
)
|
||||||
|
hist = await run_remote_query_assistant_proposals(
|
||||||
|
release_name=payload.release_name,
|
||||||
|
proposal_set_id=payload.proposal_set_id,
|
||||||
|
limit=500,
|
||||||
|
)
|
||||||
|
matched: Optional[AssistantSelfImproveProposal] = None
|
||||||
|
for r in hist.get("rows", []):
|
||||||
|
if str(r.get("proposal_id") or "") != payload.proposal_id:
|
||||||
|
continue
|
||||||
|
files_json = r.get("files_json") or "[]"
|
||||||
|
tests_json = r.get("tests_json") or "[]"
|
||||||
|
try:
|
||||||
|
files = json.loads(files_json)
|
||||||
|
if not isinstance(files, list):
|
||||||
|
files = []
|
||||||
|
except Exception:
|
||||||
|
files = []
|
||||||
|
try:
|
||||||
|
tests = json.loads(tests_json)
|
||||||
|
if not isinstance(tests, list):
|
||||||
|
tests = []
|
||||||
|
except Exception:
|
||||||
|
tests = []
|
||||||
|
risk_raw = str(r.get("risk") or "medium").strip().lower()
|
||||||
|
risk: Literal["low", "medium", "high"] = "medium"
|
||||||
|
if risk_raw in ("low", "medium", "high"):
|
||||||
|
risk = risk_raw # type: ignore[assignment]
|
||||||
|
matched = AssistantSelfImproveProposal(
|
||||||
|
proposal_id=str(r.get("proposal_id") or payload.proposal_id),
|
||||||
|
title=str(r.get("title") or ""),
|
||||||
|
problem=str(r.get("problem") or ""),
|
||||||
|
change=str(r.get("change_text") or ""),
|
||||||
|
files=[str(x) for x in files if str(x).strip()],
|
||||||
|
risk=risk,
|
||||||
|
tests=[str(x) for x in tests if str(x).strip()],
|
||||||
|
auto_apply_safe=bool(r.get("auto_apply_safe", False)),
|
||||||
|
)
|
||||||
|
break
|
||||||
|
if matched is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Proposal not found for set_id={payload.proposal_set_id} proposal_id={payload.proposal_id}",
|
||||||
|
)
|
||||||
|
proposal = matched
|
||||||
|
assert proposal is not None
|
||||||
|
|
||||||
branch_slug = _slugify(f"{proposal.proposal_id}-{proposal.title}")
|
branch_slug = _slugify(f"{proposal.proposal_id}-{proposal.title}")
|
||||||
branch_name = f"assistant/{branch_slug}"
|
branch_name = f"assistant/{branch_slug}"
|
||||||
|
|
|
||||||
33
query-assistant-proposals-via-spark-container.sh
Executable file
33
query-assistant-proposals-via-spark-container.sh
Executable file
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
RELEASE_NAME="${1:-}"
|
||||||
|
PROPOSAL_SET_ID="${2:-}"
|
||||||
|
LIMIT="${3:-200}"
|
||||||
|
PROPOSAL_TABLE="${PROPOSAL_TABLE:-lake.db1.assistant_proposals}"
|
||||||
|
|
||||||
|
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
|
||||||
|
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
|
||||||
|
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
|
||||||
|
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./query_assistant_proposals.py}"
|
||||||
|
SCRIPT_REMOTE="/tmp/query_assistant_proposals.py"
|
||||||
|
|
||||||
|
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
|
||||||
|
echo "query_assistant_proposals.py not found at: $SCRIPT_LOCAL" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
|
||||||
|
|
||||||
|
docker exec \
|
||||||
|
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
|
||||||
|
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
|
||||||
|
"$CONTAINER_NAME" \
|
||||||
|
/opt/spark/bin/spark-submit \
|
||||||
|
--properties-file "$SPARK_PROPS" \
|
||||||
|
--packages "$PACKAGES" \
|
||||||
|
"$SCRIPT_REMOTE" \
|
||||||
|
--table "$PROPOSAL_TABLE" \
|
||||||
|
--release-name "$RELEASE_NAME" \
|
||||||
|
--proposal-set-id "$PROPOSAL_SET_ID" \
|
||||||
|
--limit "$LIMIT"
|
||||||
36
query_assistant_proposals.py
Normal file
36
query_assistant_proposals.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from pyspark.sql import SparkSession
|
||||||
|
from pyspark.sql import functions as F
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
p = argparse.ArgumentParser(description="Query assistant proposal rows")
|
||||||
|
p.add_argument("--table", default=os.getenv("PROPOSAL_TABLE", "lake.db1.assistant_proposals"))
|
||||||
|
p.add_argument("--release-name", default="")
|
||||||
|
p.add_argument("--proposal-set-id", default="")
|
||||||
|
p.add_argument("--limit", type=int, default=200)
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
spark = SparkSession.builder.appName("query-assistant-proposals").getOrCreate()
|
||||||
|
df = spark.table(args.table)
|
||||||
|
|
||||||
|
if args.release_name:
|
||||||
|
df = df.where(F.col("release_name") == args.release_name)
|
||||||
|
if args.proposal_set_id:
|
||||||
|
df = df.where(F.col("proposal_set_id") == args.proposal_set_id)
|
||||||
|
|
||||||
|
rows = (
|
||||||
|
df.orderBy(F.col("created_at_utc").desc_nulls_last(), F.col("proposal_id").asc_nulls_last())
|
||||||
|
.limit(max(1, min(args.limit, 2000)))
|
||||||
|
.collect()
|
||||||
|
)
|
||||||
|
|
||||||
|
out = [r.asDict(recursive=True) for r in rows]
|
||||||
|
print(json.dumps(out, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
48
record-assistant-proposals-via-spark-container.sh
Executable file
48
record-assistant-proposals-via-spark-container.sh
Executable file
|
|
@ -0,0 +1,48 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
PROPOSAL_TABLE="${PROPOSAL_TABLE:-lake.db1.assistant_proposals}"
|
||||||
|
PROPOSAL_SET_ID="${1:-}"
|
||||||
|
CREATED_AT_UTC="${2:-}"
|
||||||
|
OBJECTIVE_B64="${3:-}"
|
||||||
|
RELEASE_NAME="${4:-}"
|
||||||
|
SUMMARY_B64="${5:-}"
|
||||||
|
SIGNALS_B64="${6:-}"
|
||||||
|
PROPOSALS_B64="${7:-}"
|
||||||
|
|
||||||
|
if [[ -z "$PROPOSAL_SET_ID" || -z "$CREATED_AT_UTC" ]]; then
|
||||||
|
echo "Usage: $0 <proposal_set_id> <created_at_utc> <objective_b64> <release_name> <summary_b64> <signals_b64> <proposals_b64>" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
|
||||||
|
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
|
||||||
|
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
|
||||||
|
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./write_assistant_proposals.py}"
|
||||||
|
SCRIPT_REMOTE="/tmp/write_assistant_proposals.py"
|
||||||
|
|
||||||
|
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
|
||||||
|
echo "write_assistant_proposals.py not found at: $SCRIPT_LOCAL" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
|
||||||
|
|
||||||
|
docker exec \
|
||||||
|
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
|
||||||
|
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
|
||||||
|
"$CONTAINER_NAME" \
|
||||||
|
/opt/spark/bin/spark-submit \
|
||||||
|
--properties-file "$SPARK_PROPS" \
|
||||||
|
--packages "$PACKAGES" \
|
||||||
|
"$SCRIPT_REMOTE" \
|
||||||
|
--table "$PROPOSAL_TABLE" \
|
||||||
|
--proposal-set-id "$PROPOSAL_SET_ID" \
|
||||||
|
--created-at-utc "$CREATED_AT_UTC" \
|
||||||
|
--objective-b64 "$OBJECTIVE_B64" \
|
||||||
|
--release-name "$RELEASE_NAME" \
|
||||||
|
--summary-b64 "$SUMMARY_B64" \
|
||||||
|
--signals-b64 "$SIGNALS_B64" \
|
||||||
|
--proposals-b64 "$PROPOSALS_B64"
|
||||||
|
|
||||||
|
echo "[DONE] Recorded proposal set ${PROPOSAL_SET_ID} into ${PROPOSAL_TABLE}"
|
||||||
|
|
@ -237,42 +237,92 @@ async function runSelfImprove() {
|
||||||
include_blocked_actions: true,
|
include_blocked_actions: true,
|
||||||
apply: false,
|
apply: false,
|
||||||
});
|
});
|
||||||
window.selfImproveLast = data.proposals || [];
|
summary.textContent =
|
||||||
summary.textContent = `${data.summary || ""}\n\nsignals: feedback=${data.signals?.feedback_rows ?? 0}, blocked_actions=${data.signals?.blocked_action_rows ?? 0}`;
|
`${data.summary || ""}\n\nproposal_set_id=${data.proposal_set_id}\n` +
|
||||||
renderRows(list, data.proposals || [], (p, idx) => `
|
`signals: feedback=${data.signals?.feedback_rows ?? 0}, blocked_actions=${data.signals?.blocked_action_rows ?? 0}`;
|
||||||
|
renderProposalSets(
|
||||||
|
[{ proposal_set_id: data.proposal_set_id, created_at_utc: data.created_at_utc, objective, release_name: cfg.releaseName || "", summary: data.summary, signals: data.signals || {}, proposals: data.proposals || [] }],
|
||||||
|
list,
|
||||||
|
summary,
|
||||||
|
cfg.releaseName || null
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
summary.textContent = `Error: ${String(e)}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderProposalSets(sets, listEl, summaryEl, releaseName) {
|
||||||
|
listEl.innerHTML = "";
|
||||||
|
if (!sets || sets.length === 0) {
|
||||||
|
listEl.innerHTML = '<div class="row">No proposal sets.</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sets.forEach((setObj) => {
|
||||||
|
const wrap = document.createElement("div");
|
||||||
|
wrap.className = "row";
|
||||||
|
const proposals = setObj.proposals || [];
|
||||||
|
const proposalsHtml = proposals.map((p) => `
|
||||||
|
<div class="row" style="margin-top:8px">
|
||||||
<div><strong>${p.proposal_id}: ${p.title}</strong></div>
|
<div><strong>${p.proposal_id}: ${p.title}</strong></div>
|
||||||
<div>${p.problem}</div>
|
<div>${p.problem || ""}</div>
|
||||||
<div class="meta">risk=${p.risk} | auto_apply_safe=${p.auto_apply_safe}</div>
|
<div class="meta">risk=${p.risk} | auto_apply_safe=${p.auto_apply_safe}</div>
|
||||||
<div class="meta">files=${(p.files || []).join(", ") || "-"}</div>
|
<div class="meta">files=${(p.files || []).join(", ") || "-"}</div>
|
||||||
<div class="meta">tests=${(p.tests || []).join(" | ") || "-"}</div>
|
<div class="meta">tests=${(p.tests || []).join(" | ") || "-"}</div>
|
||||||
<div style="margin-top:6px">${p.change || ""}</div>
|
<div style="margin-top:6px">${p.change || ""}</div>
|
||||||
<div style="margin-top:6px"><button class="apply-proposal" data-proposal-id="${p.proposal_id}">Apply as branch</button></div>
|
<div style="margin-top:6px">
|
||||||
`);
|
<button class="apply-proposal" data-proposal-set-id="${setObj.proposal_set_id}" data-proposal-id="${p.proposal_id}">Apply as branch</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join("");
|
||||||
|
wrap.innerHTML = `
|
||||||
|
<div><strong>Set ${setObj.proposal_set_id}</strong></div>
|
||||||
|
<div class="meta">${setObj.created_at_utc || ""} | release=${setObj.release_name || ""}</div>
|
||||||
|
<div>${setObj.summary || ""}</div>
|
||||||
|
${proposalsHtml}
|
||||||
|
`;
|
||||||
|
listEl.appendChild(wrap);
|
||||||
|
});
|
||||||
|
|
||||||
document.querySelectorAll(".apply-proposal").forEach((btn) => {
|
document.querySelectorAll(".apply-proposal").forEach((btn) => {
|
||||||
btn.addEventListener("click", async () => {
|
btn.addEventListener("click", async () => {
|
||||||
|
const proposalSetId = btn.getAttribute("data-proposal-set-id");
|
||||||
const proposalId = btn.getAttribute("data-proposal-id");
|
const proposalId = btn.getAttribute("data-proposal-id");
|
||||||
const found = (window.selfImproveLast || []).find((x) => x.proposal_id === proposalId);
|
if (!proposalSetId || !proposalId) return;
|
||||||
if (!found) return;
|
|
||||||
const dryRun = document.getElementById("improveDryRun").checked;
|
const dryRun = document.getElementById("improveDryRun").checked;
|
||||||
summary.textContent = `Applying ${proposalId}...`;
|
summaryEl.textContent = `Applying ${proposalId} from set ${proposalSetId}...`;
|
||||||
try {
|
try {
|
||||||
const applied = await apiPost("/assistant/self-improve/apply", {
|
const applied = await apiPost("/assistant/self-improve/apply", {
|
||||||
objective,
|
objective: document.getElementById("improveObjective").value.trim() || null,
|
||||||
release_name: cfg.releaseName || null,
|
release_name: releaseName || null,
|
||||||
proposal: found,
|
proposal_set_id: proposalSetId,
|
||||||
|
proposal_id: proposalId,
|
||||||
dry_run: dryRun,
|
dry_run: dryRun,
|
||||||
});
|
});
|
||||||
summary.textContent =
|
summaryEl.textContent =
|
||||||
`apply result: applied=${applied.applied} dry_run=${applied.dry_run}\n` +
|
`apply result: applied=${applied.applied} dry_run=${applied.dry_run}\n` +
|
||||||
`branch=${applied.branch_name}\n` +
|
`branch=${applied.branch_name}\n` +
|
||||||
`proposal_file=${applied.proposal_file}\n` +
|
`proposal_file=${applied.proposal_file}\n` +
|
||||||
`commit=${applied.commit || "-"}\n` +
|
`commit=${applied.commit || "-"}\n` +
|
||||||
`${applied.detail || ""}`;
|
`${applied.detail || ""}`;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
summary.textContent = `Apply error: ${String(e)}`;
|
summaryEl.textContent = `Apply error: ${String(e)}`;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadImproveHistory() {
|
||||||
|
const cfg = getConfig();
|
||||||
|
const summary = document.getElementById("improveSummary");
|
||||||
|
const list = document.getElementById("improveProposals");
|
||||||
|
summary.textContent = "Loading proposal history...";
|
||||||
|
try {
|
||||||
|
const data = await apiGet("/assistant/self-improve/history", {
|
||||||
|
release_name: cfg.releaseName || null,
|
||||||
|
limit: 200,
|
||||||
|
});
|
||||||
|
summary.textContent = `history sets=${data.count || 0}`;
|
||||||
|
renderProposalSets(data.rows || [], list, summary, cfg.releaseName || null);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
summary.textContent = `Error: ${String(e)}`;
|
summary.textContent = `Error: ${String(e)}`;
|
||||||
}
|
}
|
||||||
|
|
@ -286,6 +336,8 @@ document.getElementById("makeDraft").addEventListener("click", makeDraft);
|
||||||
document.getElementById("saveLearn").addEventListener("click", saveLearn);
|
document.getElementById("saveLearn").addEventListener("click", saveLearn);
|
||||||
document.getElementById("sendChat").addEventListener("click", sendChat);
|
document.getElementById("sendChat").addEventListener("click", sendChat);
|
||||||
document.getElementById("runImprove").addEventListener("click", runSelfImprove);
|
document.getElementById("runImprove").addEventListener("click", runSelfImprove);
|
||||||
|
document.getElementById("loadImproveHistory").addEventListener("click", loadImproveHistory);
|
||||||
|
|
||||||
loadConfig();
|
loadConfig();
|
||||||
loadMeta();
|
loadMeta();
|
||||||
|
loadImproveHistory();
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,7 @@
|
||||||
<input id="improveMax" type="number" min="1" max="20" value="5" style="width:90px" />
|
<input id="improveMax" type="number" min="1" max="20" value="5" style="width:90px" />
|
||||||
<label><input id="improveDryRun" type="checkbox" /> Dry-run apply</label>
|
<label><input id="improveDryRun" type="checkbox" /> Dry-run apply</label>
|
||||||
<button id="runImprove">Generate Proposals</button>
|
<button id="runImprove">Generate Proposals</button>
|
||||||
|
<button id="loadImproveHistory">Load History</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<pre id="improveSummary" class="output"></pre>
|
<pre id="improveSummary" class="output"></pre>
|
||||||
|
|
|
||||||
143
write_assistant_proposals.py
Normal file
143
write_assistant_proposals.py
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
|
from pyspark.sql import SparkSession, types as T
|
||||||
|
|
||||||
|
|
||||||
|
def d(s: str) -> str:
|
||||||
|
if not s:
|
||||||
|
return ""
|
||||||
|
return base64.b64decode(s.encode("ascii")).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
p = argparse.ArgumentParser(description="Write assistant proposal set rows via Spark DataFrame")
|
||||||
|
p.add_argument("--table", required=True)
|
||||||
|
p.add_argument("--proposal-set-id", required=True)
|
||||||
|
p.add_argument("--created-at-utc", required=True)
|
||||||
|
p.add_argument("--objective-b64", default="")
|
||||||
|
p.add_argument("--release-name", default="")
|
||||||
|
p.add_argument("--summary-b64", default="")
|
||||||
|
p.add_argument("--signals-b64", default="")
|
||||||
|
p.add_argument("--proposals-b64", default="")
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
objective = d(args.objective_b64)
|
||||||
|
summary = d(args.summary_b64)
|
||||||
|
signals_json = d(args.signals_b64) or "{}"
|
||||||
|
proposals_json = d(args.proposals_b64) or "[]"
|
||||||
|
|
||||||
|
try:
|
||||||
|
signals_obj = json.loads(signals_json)
|
||||||
|
if not isinstance(signals_obj, dict):
|
||||||
|
signals_obj = {}
|
||||||
|
except Exception:
|
||||||
|
signals_obj = {}
|
||||||
|
signals_json = json.dumps(signals_obj, ensure_ascii=False, sort_keys=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
proposals_obj = json.loads(proposals_json)
|
||||||
|
if not isinstance(proposals_obj, list):
|
||||||
|
proposals_obj = []
|
||||||
|
except Exception:
|
||||||
|
proposals_obj = []
|
||||||
|
|
||||||
|
spark = SparkSession.builder.appName("write-assistant-proposals").getOrCreate()
|
||||||
|
spark.sql(
|
||||||
|
f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {args.table} (
|
||||||
|
proposal_set_id STRING,
|
||||||
|
created_at_utc STRING,
|
||||||
|
objective STRING,
|
||||||
|
release_name STRING,
|
||||||
|
summary STRING,
|
||||||
|
signals_json STRING,
|
||||||
|
proposal_id STRING,
|
||||||
|
title STRING,
|
||||||
|
problem STRING,
|
||||||
|
change_text STRING,
|
||||||
|
files_json STRING,
|
||||||
|
risk STRING,
|
||||||
|
tests_json STRING,
|
||||||
|
auto_apply_safe BOOLEAN
|
||||||
|
) USING iceberg
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
schema = T.StructType(
|
||||||
|
[
|
||||||
|
T.StructField("proposal_set_id", T.StringType(), False),
|
||||||
|
T.StructField("created_at_utc", T.StringType(), False),
|
||||||
|
T.StructField("objective", T.StringType(), True),
|
||||||
|
T.StructField("release_name", T.StringType(), True),
|
||||||
|
T.StructField("summary", T.StringType(), True),
|
||||||
|
T.StructField("signals_json", T.StringType(), True),
|
||||||
|
T.StructField("proposal_id", T.StringType(), False),
|
||||||
|
T.StructField("title", T.StringType(), True),
|
||||||
|
T.StructField("problem", T.StringType(), True),
|
||||||
|
T.StructField("change_text", T.StringType(), True),
|
||||||
|
T.StructField("files_json", T.StringType(), True),
|
||||||
|
T.StructField("risk", T.StringType(), True),
|
||||||
|
T.StructField("tests_json", T.StringType(), True),
|
||||||
|
T.StructField("auto_apply_safe", T.BooleanType(), False),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
for idx, p_obj in enumerate(proposals_obj):
|
||||||
|
if not isinstance(p_obj, dict):
|
||||||
|
continue
|
||||||
|
files = p_obj.get("files")
|
||||||
|
tests = p_obj.get("tests")
|
||||||
|
if not isinstance(files, list):
|
||||||
|
files = []
|
||||||
|
if not isinstance(tests, list):
|
||||||
|
tests = []
|
||||||
|
proposal_id = str(p_obj.get("proposal_id") or f"P{idx+1}")
|
||||||
|
rows.append(
|
||||||
|
(
|
||||||
|
args.proposal_set_id,
|
||||||
|
args.created_at_utc,
|
||||||
|
objective,
|
||||||
|
args.release_name or "",
|
||||||
|
summary,
|
||||||
|
signals_json,
|
||||||
|
proposal_id,
|
||||||
|
str(p_obj.get("title") or ""),
|
||||||
|
str(p_obj.get("problem") or ""),
|
||||||
|
str(p_obj.get("change") or ""),
|
||||||
|
json.dumps(files, ensure_ascii=False),
|
||||||
|
str(p_obj.get("risk") or "medium"),
|
||||||
|
json.dumps(tests, ensure_ascii=False),
|
||||||
|
bool(p_obj.get("auto_apply_safe", False)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
rows.append(
|
||||||
|
(
|
||||||
|
args.proposal_set_id,
|
||||||
|
args.created_at_utc,
|
||||||
|
objective,
|
||||||
|
args.release_name or "",
|
||||||
|
summary,
|
||||||
|
signals_json,
|
||||||
|
"P0",
|
||||||
|
"No proposals",
|
||||||
|
"No proposals returned",
|
||||||
|
"",
|
||||||
|
"[]",
|
||||||
|
"low",
|
||||||
|
"[]",
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
df = spark.createDataFrame(rows, schema=schema)
|
||||||
|
df.writeTo(args.table).append()
|
||||||
|
print(f"[DONE] Recorded proposal set {args.proposal_set_id} with {len(rows)} row(s) into {args.table}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue