Compare commits

..

No commits in common. "89955662bf4693d5121c4d8303e9f0f28e91ca34" and "cff60dcf3e43f4aab5c28d8a2d313d0dfce4703b" have entirely different histories.

9 changed files with 61 additions and 1655 deletions

File diff suppressed because one or more lines are too long

1232
app.py

File diff suppressed because it is too large Load diff

View file

@ -1,20 +0,0 @@
# P1: Improve retrieval fallback ordering
- Objective: Improve the look of the UI
- Release:
- Risk: low
- Auto apply safe: True
## Problem
Low-confidence drafts still occur when release-filtered retrieval returns no hits.
## Change
Add deterministic fallback chain and expose retrieval diagnostics in API responses.
## Files
- `app.py`
## Tests
- Call /assistant/draft with missing release_name and verify non-empty sources fallback.
_Generated at 2026-02-14T20:57:33.384354+00:00_

View file

@ -1,33 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
RELEASE_NAME="${1:-}"
PROPOSAL_SET_ID="${2:-}"
LIMIT="${3:-200}"
PROPOSAL_TABLE="${PROPOSAL_TABLE:-lake.db1.assistant_proposals}"
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./query_assistant_proposals.py}"
SCRIPT_REMOTE="/tmp/query_assistant_proposals.py"
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
echo "query_assistant_proposals.py not found at: $SCRIPT_LOCAL" >&2
exit 1
fi
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
docker exec \
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
"$CONTAINER_NAME" \
/opt/spark/bin/spark-submit \
--properties-file "$SPARK_PROPS" \
--packages "$PACKAGES" \
"$SCRIPT_REMOTE" \
--table "$PROPOSAL_TABLE" \
--release-name "$RELEASE_NAME" \
--proposal-set-id "$PROPOSAL_SET_ID" \
--limit "$LIMIT"

View file

@ -1,36 +0,0 @@
import argparse
import json
import os
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
def main() -> None:
p = argparse.ArgumentParser(description="Query assistant proposal rows")
p.add_argument("--table", default=os.getenv("PROPOSAL_TABLE", "lake.db1.assistant_proposals"))
p.add_argument("--release-name", default="")
p.add_argument("--proposal-set-id", default="")
p.add_argument("--limit", type=int, default=200)
args = p.parse_args()
spark = SparkSession.builder.appName("query-assistant-proposals").getOrCreate()
df = spark.table(args.table)
if args.release_name:
df = df.where(F.col("release_name") == args.release_name)
if args.proposal_set_id:
df = df.where(F.col("proposal_set_id") == args.proposal_set_id)
rows = (
df.orderBy(F.col("created_at_utc").desc_nulls_last(), F.col("proposal_id").asc_nulls_last())
.limit(max(1, min(args.limit, 2000)))
.collect()
)
out = [r.asDict(recursive=True) for r in rows]
print(json.dumps(out, ensure_ascii=False))
if __name__ == "__main__":
main()

View file

@ -1,48 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
PROPOSAL_TABLE="${PROPOSAL_TABLE:-lake.db1.assistant_proposals}"
PROPOSAL_SET_ID="${1:-}"
CREATED_AT_UTC="${2:-}"
OBJECTIVE_B64="${3:-}"
RELEASE_NAME="${4:-}"
SUMMARY_B64="${5:-}"
SIGNALS_B64="${6:-}"
PROPOSALS_B64="${7:-}"
if [[ -z "$PROPOSAL_SET_ID" || -z "$CREATED_AT_UTC" ]]; then
echo "Usage: $0 <proposal_set_id> <created_at_utc> <objective_b64> <release_name> <summary_b64> <signals_b64> <proposals_b64>" >&2
exit 1
fi
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./write_assistant_proposals.py}"
SCRIPT_REMOTE="/tmp/write_assistant_proposals.py"
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
echo "write_assistant_proposals.py not found at: $SCRIPT_LOCAL" >&2
exit 1
fi
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
docker exec \
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
"$CONTAINER_NAME" \
/opt/spark/bin/spark-submit \
--properties-file "$SPARK_PROPS" \
--packages "$PACKAGES" \
"$SCRIPT_REMOTE" \
--table "$PROPOSAL_TABLE" \
--proposal-set-id "$PROPOSAL_SET_ID" \
--created-at-utc "$CREATED_AT_UTC" \
--objective-b64 "$OBJECTIVE_B64" \
--release-name "$RELEASE_NAME" \
--summary-b64 "$SUMMARY_B64" \
--signals-b64 "$SIGNALS_B64" \
--proposals-b64 "$PROPOSALS_B64"
echo "[DONE] Recorded proposal set ${PROPOSAL_SET_ID} into ${PROPOSAL_TABLE}"

View file

@ -146,13 +146,7 @@ async function makeDraft() {
max_sources: 5, max_sources: 5,
}); });
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", "); const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
out.textContent = out.textContent = `${data.draft || ""}\n\nconfidence=${data.confidence}\nneeds_review=${data.needs_review}\nsources=${sourceLine}`;
`${data.draft || ""}\n\n` +
`confidence=${data.confidence}\n` +
`needs_review=${data.needs_review}\n` +
`provider=${data.provider_used || "local"}\n` +
`escalated=${Boolean(data.escalated)}\n` +
`sources=${sourceLine}`;
} catch (e) { } catch (e) {
out.textContent = `Error: ${String(e)}`; out.textContent = `Error: ${String(e)}`;
} }
@ -186,57 +180,6 @@ async function saveLearn() {
} }
} }
async function saveLearnAndWriteCode() {
const cfg = getConfig();
const title = document.getElementById("learnTitle").value.trim();
const tags = document.getElementById("learnTags").value
.split(",")
.map((x) => x.trim())
.filter(Boolean);
const text = document.getElementById("learnText").value.trim();
const codeObjective = document.getElementById("learnCodeObjective").value.trim();
const codeFiles = document.getElementById("learnCodeFiles").value
.split(",")
.map((x) => x.trim())
.filter(Boolean);
const codeDryRun = document.getElementById("learnCodeDryRun").checked;
const codeCommit = document.getElementById("learnCodeCommit").checked;
const out = document.getElementById("learnOutput");
if (!text) {
out.textContent = "Provide note text first.";
return;
}
if (codeFiles.length === 0) {
out.textContent = "Provide at least one code file path.";
return;
}
out.textContent = "Saving note and writing code...";
try {
const data = await apiPost("/assistant/learn", {
text,
title: title || null,
tags,
release_name: cfg.releaseName || null,
write_code: true,
code_objective: codeObjective || null,
code_files: codeFiles,
code_dry_run: codeDryRun,
code_commit: codeCommit,
});
const cw = data.code_write || {};
out.textContent =
`saved=${data.stored}\nconcept_id=${data.concept_id}\ntitle=${data.title}\n\n` +
`write_code.attempted=${cw.attempted}\n` +
`dry_run=${cw.dry_run}\n` +
`branch=${cw.branch_name || "-"}\n` +
`changed_files=${(cw.changed_files || []).join(", ") || "-"}\n` +
`skipped_files=${(cw.skipped_files || []).map((x) => `${x.file}:${x.reason}`).join(" | ") || "-"}\n` +
`commit=${cw.commit || "-"}`;
} catch (e) {
out.textContent = `Error: ${String(e)}`;
}
}
function appendChat(role, text, meta) { function appendChat(role, text, meta) {
const target = document.getElementById("chatTranscript"); const target = document.getElementById("chatTranscript");
const el = document.createElement("div"); const el = document.createElement("div");
@ -267,139 +210,19 @@ async function sendChat() {
max_sources: 6, max_sources: 6,
}); });
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", "); const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
appendChat( appendChat("assistant", data.answer || "", `confidence=${data.confidence} | sources=${sourceLine || "-"}`);
"assistant",
data.answer || "",
`confidence=${data.confidence} | provider=${data.provider_used || "local"} | escalated=${Boolean(data.escalated)} | sources=${sourceLine || "-"}`
);
} catch (e) { } catch (e) {
appendChat("assistant", `Error: ${String(e)}`, ""); appendChat("assistant", `Error: ${String(e)}`, "");
} }
} }
async function runSelfImprove() {
const cfg = getConfig();
const objective = document.getElementById("improveObjective").value.trim() || "Improve assistant quality and reliability";
const maxRaw = Number(document.getElementById("improveMax").value || 5);
const maxProposals = Math.max(1, Math.min(20, Number.isFinite(maxRaw) ? maxRaw : 5));
const summary = document.getElementById("improveSummary");
const list = document.getElementById("improveProposals");
summary.textContent = "Generating proposals...";
list.innerHTML = "";
try {
const data = await apiPost("/assistant/self-improve", {
objective,
release_name: cfg.releaseName || null,
max_proposals: maxProposals,
feedback_limit: 50,
action_limit: 50,
include_edited_feedback: true,
include_rejected_feedback: true,
include_blocked_actions: true,
apply: false,
});
summary.textContent =
`${data.summary || ""}\n\nproposal_set_id=${data.proposal_set_id}\n` +
`signals: feedback=${data.signals?.feedback_rows ?? 0}, blocked_actions=${data.signals?.blocked_action_rows ?? 0}`;
renderProposalSets(
[{ proposal_set_id: data.proposal_set_id, created_at_utc: data.created_at_utc, objective, release_name: cfg.releaseName || "", summary: data.summary, signals: data.signals || {}, proposals: data.proposals || [] }],
list,
summary,
cfg.releaseName || null
);
} catch (e) {
summary.textContent = `Error: ${String(e)}`;
}
}
function renderProposalSets(sets, listEl, summaryEl, releaseName) {
listEl.innerHTML = "";
if (!sets || sets.length === 0) {
listEl.innerHTML = '<div class="row">No proposal sets.</div>';
return;
}
sets.forEach((setObj) => {
const wrap = document.createElement("div");
wrap.className = "row";
const proposals = setObj.proposals || [];
const proposalsHtml = proposals.map((p) => `
<div class="row" style="margin-top:8px">
<div><strong>${p.proposal_id}: ${p.title}</strong></div>
<div>${p.problem || ""}</div>
<div class="meta">risk=${p.risk} | auto_apply_safe=${p.auto_apply_safe}</div>
<div class="meta">files=${(p.files || []).join(", ") || "-"}</div>
<div class="meta">tests=${(p.tests || []).join(" | ") || "-"}</div>
<div style="margin-top:6px">${p.change || ""}</div>
<div style="margin-top:6px">
<button class="apply-proposal" data-proposal-set-id="${setObj.proposal_set_id}" data-proposal-id="${p.proposal_id}">Apply as branch</button>
</div>
</div>
`).join("");
wrap.innerHTML = `
<div><strong>Set ${setObj.proposal_set_id}</strong></div>
<div class="meta">${setObj.created_at_utc || ""} | release=${setObj.release_name || ""}</div>
<div>${setObj.summary || ""}</div>
${proposalsHtml}
`;
listEl.appendChild(wrap);
});
document.querySelectorAll(".apply-proposal").forEach((btn) => {
btn.addEventListener("click", async () => {
const proposalSetId = btn.getAttribute("data-proposal-set-id");
const proposalId = btn.getAttribute("data-proposal-id");
if (!proposalSetId || !proposalId) return;
const dryRun = document.getElementById("improveDryRun").checked;
summaryEl.textContent = `Applying ${proposalId} from set ${proposalSetId}...`;
try {
const applied = await apiPost("/assistant/self-improve/apply", {
objective: document.getElementById("improveObjective").value.trim() || null,
release_name: releaseName || null,
proposal_set_id: proposalSetId,
proposal_id: proposalId,
dry_run: dryRun,
});
summaryEl.textContent =
`apply result: applied=${applied.applied} dry_run=${applied.dry_run}\n` +
`branch=${applied.branch_name}\n` +
`proposal_file=${applied.proposal_file}\n` +
`commit=${applied.commit || "-"}\n` +
`${applied.detail || ""}`;
} catch (e) {
summaryEl.textContent = `Apply error: ${String(e)}`;
}
});
});
}
async function loadImproveHistory() {
const cfg = getConfig();
const summary = document.getElementById("improveSummary");
const list = document.getElementById("improveProposals");
summary.textContent = "Loading proposal history...";
try {
const data = await apiGet("/assistant/self-improve/history", {
release_name: cfg.releaseName || null,
limit: 200,
});
summary.textContent = `history sets=${data.count || 0}`;
renderProposalSets(data.rows || [], list, summary, cfg.releaseName || null);
} catch (e) {
summary.textContent = `Error: ${String(e)}`;
}
}
document.getElementById("saveConfig").addEventListener("click", saveConfig); document.getElementById("saveConfig").addEventListener("click", saveConfig);
document.getElementById("refreshMeta").addEventListener("click", loadMeta); document.getElementById("refreshMeta").addEventListener("click", loadMeta);
document.getElementById("loadInbox").addEventListener("click", loadInbox); document.getElementById("loadInbox").addEventListener("click", loadInbox);
document.getElementById("loadTasks").addEventListener("click", loadTasks); document.getElementById("loadTasks").addEventListener("click", loadTasks);
document.getElementById("makeDraft").addEventListener("click", makeDraft); document.getElementById("makeDraft").addEventListener("click", makeDraft);
document.getElementById("saveLearn").addEventListener("click", saveLearn); document.getElementById("saveLearn").addEventListener("click", saveLearn);
document.getElementById("saveLearnWrite").addEventListener("click", saveLearnAndWriteCode);
document.getElementById("sendChat").addEventListener("click", sendChat); document.getElementById("sendChat").addEventListener("click", sendChat);
document.getElementById("runImprove").addEventListener("click", runSelfImprove);
document.getElementById("loadImproveHistory").addEventListener("click", loadImproveHistory);
loadConfig(); loadConfig();
loadMeta(); loadMeta();
loadImproveHistory();

View file

@ -60,15 +60,8 @@
<input id="learnTitle" type="text" placeholder="Title (optional)" /> <input id="learnTitle" type="text" placeholder="Title (optional)" />
<input id="learnTags" type="text" placeholder="tags comma-separated (optional)" /> <input id="learnTags" type="text" placeholder="tags comma-separated (optional)" />
<button id="saveLearn">Save Note</button> <button id="saveLearn">Save Note</button>
<button id="saveLearnWrite">Save + Write Code</button>
</div> </div>
</div> </div>
<div class="controls" style="margin-bottom:8px">
<input id="learnCodeObjective" type="text" placeholder="Code objective (optional)" />
<input id="learnCodeFiles" type="text" placeholder="code files (comma-separated)" />
<label><input id="learnCodeDryRun" type="checkbox" checked /> Dry-run</label>
<label><input id="learnCodeCommit" type="checkbox" /> Commit</label>
</div>
<textarea id="learnText" rows="3" placeholder="Knowledge note you want the assistant to remember"></textarea> <textarea id="learnText" rows="3" placeholder="Knowledge note you want the assistant to remember"></textarea>
<pre id="learnOutput" class="output"></pre> <pre id="learnOutput" class="output"></pre>
</section> </section>
@ -84,21 +77,6 @@
<textarea id="chatMessage" rows="2" placeholder="Ask the assistant..."></textarea> <textarea id="chatMessage" rows="2" placeholder="Ask the assistant..."></textarea>
<div id="chatTranscript" class="list"></div> <div id="chatTranscript" class="list"></div>
</section> </section>
<section class="panel">
<div class="panel-header">
<h2>Self-Improve</h2>
<div class="controls">
<input id="improveObjective" type="text" placeholder="Objective" />
<input id="improveMax" type="number" min="1" max="20" value="5" style="width:90px" />
<label><input id="improveDryRun" type="checkbox" /> Dry-run apply</label>
<button id="runImprove">Generate Proposals</button>
<button id="loadImproveHistory">Load History</button>
</div>
</div>
<pre id="improveSummary" class="output"></pre>
<div id="improveProposals" class="list"></div>
</section>
</main> </main>
<script src="/ui/assets/app.js"></script> <script src="/ui/assets/app.js"></script>

View file

@ -1,143 +0,0 @@
import argparse
import base64
import json
from pyspark.sql import SparkSession, types as T
def d(s: str) -> str:
if not s:
return ""
return base64.b64decode(s.encode("ascii")).decode("utf-8")
def main() -> None:
p = argparse.ArgumentParser(description="Write assistant proposal set rows via Spark DataFrame")
p.add_argument("--table", required=True)
p.add_argument("--proposal-set-id", required=True)
p.add_argument("--created-at-utc", required=True)
p.add_argument("--objective-b64", default="")
p.add_argument("--release-name", default="")
p.add_argument("--summary-b64", default="")
p.add_argument("--signals-b64", default="")
p.add_argument("--proposals-b64", default="")
args = p.parse_args()
objective = d(args.objective_b64)
summary = d(args.summary_b64)
signals_json = d(args.signals_b64) or "{}"
proposals_json = d(args.proposals_b64) or "[]"
try:
signals_obj = json.loads(signals_json)
if not isinstance(signals_obj, dict):
signals_obj = {}
except Exception:
signals_obj = {}
signals_json = json.dumps(signals_obj, ensure_ascii=False, sort_keys=True)
try:
proposals_obj = json.loads(proposals_json)
if not isinstance(proposals_obj, list):
proposals_obj = []
except Exception:
proposals_obj = []
spark = SparkSession.builder.appName("write-assistant-proposals").getOrCreate()
spark.sql(
f"""
CREATE TABLE IF NOT EXISTS {args.table} (
proposal_set_id STRING,
created_at_utc STRING,
objective STRING,
release_name STRING,
summary STRING,
signals_json STRING,
proposal_id STRING,
title STRING,
problem STRING,
change_text STRING,
files_json STRING,
risk STRING,
tests_json STRING,
auto_apply_safe BOOLEAN
) USING iceberg
"""
)
schema = T.StructType(
[
T.StructField("proposal_set_id", T.StringType(), False),
T.StructField("created_at_utc", T.StringType(), False),
T.StructField("objective", T.StringType(), True),
T.StructField("release_name", T.StringType(), True),
T.StructField("summary", T.StringType(), True),
T.StructField("signals_json", T.StringType(), True),
T.StructField("proposal_id", T.StringType(), False),
T.StructField("title", T.StringType(), True),
T.StructField("problem", T.StringType(), True),
T.StructField("change_text", T.StringType(), True),
T.StructField("files_json", T.StringType(), True),
T.StructField("risk", T.StringType(), True),
T.StructField("tests_json", T.StringType(), True),
T.StructField("auto_apply_safe", T.BooleanType(), False),
]
)
rows = []
for idx, p_obj in enumerate(proposals_obj):
if not isinstance(p_obj, dict):
continue
files = p_obj.get("files")
tests = p_obj.get("tests")
if not isinstance(files, list):
files = []
if not isinstance(tests, list):
tests = []
proposal_id = str(p_obj.get("proposal_id") or f"P{idx+1}")
rows.append(
(
args.proposal_set_id,
args.created_at_utc,
objective,
args.release_name or "",
summary,
signals_json,
proposal_id,
str(p_obj.get("title") or ""),
str(p_obj.get("problem") or ""),
str(p_obj.get("change") or ""),
json.dumps(files, ensure_ascii=False),
str(p_obj.get("risk") or "medium"),
json.dumps(tests, ensure_ascii=False),
bool(p_obj.get("auto_apply_safe", False)),
)
)
if not rows:
rows.append(
(
args.proposal_set_id,
args.created_at_utc,
objective,
args.release_name or "",
summary,
signals_json,
"P0",
"No proposals",
"No proposals returned",
"",
"[]",
"low",
"[]",
False,
)
)
df = spark.createDataFrame(rows, schema=schema)
df.writeTo(args.table).append()
print(f"[DONE] Recorded proposal set {args.proposal_set_id} with {len(rows)} row(s) into {args.table}")
if __name__ == "__main__":
main()