Compare commits

...

9 commits

Author SHA1 Message Date
Carl Niklas Rydberg 89955662bf Some tweaks. 2026-02-15 00:33:04 +01:00
Carl Niklas Rydberg f3e3f4f586 Revert "feat(assistant): apply learn code update P1"
This reverts commit 4c52620159.
2026-02-14 22:21:54 +01:00
Carl Niklas Rydberg 4c52620159 feat(assistant): apply learn code update P1 2026-02-14 22:13:39 +01:00
Carl Niklas Rydberg 773aff7c10 feat(assistant): add optional code writing flow to learn endpoint and UI 2026-02-14 22:07:04 +01:00
Carl Niklas Rydberg b55824f49c chore(self-improve): apply P1 Improve retrieval fallback ordering 2026-02-14 21:57:33 +01:00
Carl Niklas Rydberg c6dad7427a feat(self-improve): persist proposal sets to Iceberg with history and apply-by-id 2026-02-14 21:54:36 +01:00
Carl Niklas Rydberg 6b75c0b596 feat(self-improve): add apply-to-branch endpoint and UI action buttons 2026-02-14 21:48:45 +01:00
Carl Niklas Rydberg c5b2776978 feat(ui): add self-improve panel and proposal rendering 2026-02-14 21:41:09 +01:00
Carl Niklas Rydberg 47ae5891e1 feat(assistant): add self-improvement proposal endpoint from feedback and blocked actions 2026-02-14 21:40:12 +01:00
9 changed files with 1655 additions and 61 deletions

File diff suppressed because one or more lines are too long

1224
app.py

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
# P1: Improve retrieval fallback ordering
- Objective: Improve the look of the UI
- Release:
- Risk: low
- Auto apply safe: True
## Problem
Low-confidence drafts still occur when release-filtered retrieval returns no hits.
## Change
Add deterministic fallback chain and expose retrieval diagnostics in API responses.
## Files
- `app.py`
## Tests
- Call /assistant/draft with missing release_name and verify non-empty sources fallback.
_Generated at 2026-02-14T20:57:33.384354+00:00_

View file

@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -euo pipefail
RELEASE_NAME="${1:-}"
PROPOSAL_SET_ID="${2:-}"
LIMIT="${3:-200}"
PROPOSAL_TABLE="${PROPOSAL_TABLE:-lake.db1.assistant_proposals}"
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./query_assistant_proposals.py}"
SCRIPT_REMOTE="/tmp/query_assistant_proposals.py"
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
echo "query_assistant_proposals.py not found at: $SCRIPT_LOCAL" >&2
exit 1
fi
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
docker exec \
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
"$CONTAINER_NAME" \
/opt/spark/bin/spark-submit \
--properties-file "$SPARK_PROPS" \
--packages "$PACKAGES" \
"$SCRIPT_REMOTE" \
--table "$PROPOSAL_TABLE" \
--release-name "$RELEASE_NAME" \
--proposal-set-id "$PROPOSAL_SET_ID" \
--limit "$LIMIT"

View file

@ -0,0 +1,36 @@
import argparse
import json
import os
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
def main() -> None:
p = argparse.ArgumentParser(description="Query assistant proposal rows")
p.add_argument("--table", default=os.getenv("PROPOSAL_TABLE", "lake.db1.assistant_proposals"))
p.add_argument("--release-name", default="")
p.add_argument("--proposal-set-id", default="")
p.add_argument("--limit", type=int, default=200)
args = p.parse_args()
spark = SparkSession.builder.appName("query-assistant-proposals").getOrCreate()
df = spark.table(args.table)
if args.release_name:
df = df.where(F.col("release_name") == args.release_name)
if args.proposal_set_id:
df = df.where(F.col("proposal_set_id") == args.proposal_set_id)
rows = (
df.orderBy(F.col("created_at_utc").desc_nulls_last(), F.col("proposal_id").asc_nulls_last())
.limit(max(1, min(args.limit, 2000)))
.collect()
)
out = [r.asDict(recursive=True) for r in rows]
print(json.dumps(out, ensure_ascii=False))
if __name__ == "__main__":
main()

View file

@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail
PROPOSAL_TABLE="${PROPOSAL_TABLE:-lake.db1.assistant_proposals}"
PROPOSAL_SET_ID="${1:-}"
CREATED_AT_UTC="${2:-}"
OBJECTIVE_B64="${3:-}"
RELEASE_NAME="${4:-}"
SUMMARY_B64="${5:-}"
SIGNALS_B64="${6:-}"
PROPOSALS_B64="${7:-}"
if [[ -z "$PROPOSAL_SET_ID" || -z "$CREATED_AT_UTC" ]]; then
echo "Usage: $0 <proposal_set_id> <created_at_utc> <objective_b64> <release_name> <summary_b64> <signals_b64> <proposals_b64>" >&2
exit 1
fi
CONTAINER_NAME="${SPARK_CONTAINER_NAME:-spark}"
SPARK_PROPS="${SPARK_PROPS:-/opt/lakehouse/spark-conf/lakehouse-spark-defaults.conf}"
PACKAGES="${SPARK_PACKAGES:-org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1,org.apache.iceberg:iceberg-aws-bundle:1.10.1,org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.104.5}"
SCRIPT_LOCAL="${SCRIPT_LOCAL:-./write_assistant_proposals.py}"
SCRIPT_REMOTE="/tmp/write_assistant_proposals.py"
if [[ ! -f "$SCRIPT_LOCAL" ]]; then
echo "write_assistant_proposals.py not found at: $SCRIPT_LOCAL" >&2
exit 1
fi
docker cp "$SCRIPT_LOCAL" "$CONTAINER_NAME":"$SCRIPT_REMOTE"
docker exec \
-e AWS_REGION="${AWS_REGION:-us-east-1}" \
-e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \
"$CONTAINER_NAME" \
/opt/spark/bin/spark-submit \
--properties-file "$SPARK_PROPS" \
--packages "$PACKAGES" \
"$SCRIPT_REMOTE" \
--table "$PROPOSAL_TABLE" \
--proposal-set-id "$PROPOSAL_SET_ID" \
--created-at-utc "$CREATED_AT_UTC" \
--objective-b64 "$OBJECTIVE_B64" \
--release-name "$RELEASE_NAME" \
--summary-b64 "$SUMMARY_B64" \
--signals-b64 "$SIGNALS_B64" \
--proposals-b64 "$PROPOSALS_B64"
echo "[DONE] Recorded proposal set ${PROPOSAL_SET_ID} into ${PROPOSAL_TABLE}"

View file

@ -146,7 +146,13 @@ async function makeDraft() {
max_sources: 5,
});
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
out.textContent = `${data.draft || ""}\n\nconfidence=${data.confidence}\nneeds_review=${data.needs_review}\nsources=${sourceLine}`;
out.textContent =
`${data.draft || ""}\n\n` +
`confidence=${data.confidence}\n` +
`needs_review=${data.needs_review}\n` +
`provider=${data.provider_used || "local"}\n` +
`escalated=${Boolean(data.escalated)}\n` +
`sources=${sourceLine}`;
} catch (e) {
out.textContent = `Error: ${String(e)}`;
}
@ -180,6 +186,57 @@ async function saveLearn() {
}
}
async function saveLearnAndWriteCode() {
const cfg = getConfig();
const title = document.getElementById("learnTitle").value.trim();
const tags = document.getElementById("learnTags").value
.split(",")
.map((x) => x.trim())
.filter(Boolean);
const text = document.getElementById("learnText").value.trim();
const codeObjective = document.getElementById("learnCodeObjective").value.trim();
const codeFiles = document.getElementById("learnCodeFiles").value
.split(",")
.map((x) => x.trim())
.filter(Boolean);
const codeDryRun = document.getElementById("learnCodeDryRun").checked;
const codeCommit = document.getElementById("learnCodeCommit").checked;
const out = document.getElementById("learnOutput");
if (!text) {
out.textContent = "Provide note text first.";
return;
}
if (codeFiles.length === 0) {
out.textContent = "Provide at least one code file path.";
return;
}
out.textContent = "Saving note and writing code...";
try {
const data = await apiPost("/assistant/learn", {
text,
title: title || null,
tags,
release_name: cfg.releaseName || null,
write_code: true,
code_objective: codeObjective || null,
code_files: codeFiles,
code_dry_run: codeDryRun,
code_commit: codeCommit,
});
const cw = data.code_write || {};
out.textContent =
`saved=${data.stored}\nconcept_id=${data.concept_id}\ntitle=${data.title}\n\n` +
`write_code.attempted=${cw.attempted}\n` +
`dry_run=${cw.dry_run}\n` +
`branch=${cw.branch_name || "-"}\n` +
`changed_files=${(cw.changed_files || []).join(", ") || "-"}\n` +
`skipped_files=${(cw.skipped_files || []).map((x) => `${x.file}:${x.reason}`).join(" | ") || "-"}\n` +
`commit=${cw.commit || "-"}`;
} catch (e) {
out.textContent = `Error: ${String(e)}`;
}
}
function appendChat(role, text, meta) {
const target = document.getElementById("chatTranscript");
const el = document.createElement("div");
@ -210,19 +267,139 @@ async function sendChat() {
max_sources: 6,
});
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
appendChat("assistant", data.answer || "", `confidence=${data.confidence} | sources=${sourceLine || "-"}`);
appendChat(
"assistant",
data.answer || "",
`confidence=${data.confidence} | provider=${data.provider_used || "local"} | escalated=${Boolean(data.escalated)} | sources=${sourceLine || "-"}`
);
} catch (e) {
appendChat("assistant", `Error: ${String(e)}`, "");
}
}
async function runSelfImprove() {
const cfg = getConfig();
const objective = document.getElementById("improveObjective").value.trim() || "Improve assistant quality and reliability";
const maxRaw = Number(document.getElementById("improveMax").value || 5);
const maxProposals = Math.max(1, Math.min(20, Number.isFinite(maxRaw) ? maxRaw : 5));
const summary = document.getElementById("improveSummary");
const list = document.getElementById("improveProposals");
summary.textContent = "Generating proposals...";
list.innerHTML = "";
try {
const data = await apiPost("/assistant/self-improve", {
objective,
release_name: cfg.releaseName || null,
max_proposals: maxProposals,
feedback_limit: 50,
action_limit: 50,
include_edited_feedback: true,
include_rejected_feedback: true,
include_blocked_actions: true,
apply: false,
});
summary.textContent =
`${data.summary || ""}\n\nproposal_set_id=${data.proposal_set_id}\n` +
`signals: feedback=${data.signals?.feedback_rows ?? 0}, blocked_actions=${data.signals?.blocked_action_rows ?? 0}`;
renderProposalSets(
[{ proposal_set_id: data.proposal_set_id, created_at_utc: data.created_at_utc, objective, release_name: cfg.releaseName || "", summary: data.summary, signals: data.signals || {}, proposals: data.proposals || [] }],
list,
summary,
cfg.releaseName || null
);
} catch (e) {
summary.textContent = `Error: ${String(e)}`;
}
}
function renderProposalSets(sets, listEl, summaryEl, releaseName) {
listEl.innerHTML = "";
if (!sets || sets.length === 0) {
listEl.innerHTML = '<div class="row">No proposal sets.</div>';
return;
}
sets.forEach((setObj) => {
const wrap = document.createElement("div");
wrap.className = "row";
const proposals = setObj.proposals || [];
const proposalsHtml = proposals.map((p) => `
<div class="row" style="margin-top:8px">
<div><strong>${p.proposal_id}: ${p.title}</strong></div>
<div>${p.problem || ""}</div>
<div class="meta">risk=${p.risk} | auto_apply_safe=${p.auto_apply_safe}</div>
<div class="meta">files=${(p.files || []).join(", ") || "-"}</div>
<div class="meta">tests=${(p.tests || []).join(" | ") || "-"}</div>
<div style="margin-top:6px">${p.change || ""}</div>
<div style="margin-top:6px">
<button class="apply-proposal" data-proposal-set-id="${setObj.proposal_set_id}" data-proposal-id="${p.proposal_id}">Apply as branch</button>
</div>
</div>
`).join("");
wrap.innerHTML = `
<div><strong>Set ${setObj.proposal_set_id}</strong></div>
<div class="meta">${setObj.created_at_utc || ""} | release=${setObj.release_name || ""}</div>
<div>${setObj.summary || ""}</div>
${proposalsHtml}
`;
listEl.appendChild(wrap);
});
document.querySelectorAll(".apply-proposal").forEach((btn) => {
btn.addEventListener("click", async () => {
const proposalSetId = btn.getAttribute("data-proposal-set-id");
const proposalId = btn.getAttribute("data-proposal-id");
if (!proposalSetId || !proposalId) return;
const dryRun = document.getElementById("improveDryRun").checked;
summaryEl.textContent = `Applying ${proposalId} from set ${proposalSetId}...`;
try {
const applied = await apiPost("/assistant/self-improve/apply", {
objective: document.getElementById("improveObjective").value.trim() || null,
release_name: releaseName || null,
proposal_set_id: proposalSetId,
proposal_id: proposalId,
dry_run: dryRun,
});
summaryEl.textContent =
`apply result: applied=${applied.applied} dry_run=${applied.dry_run}\n` +
`branch=${applied.branch_name}\n` +
`proposal_file=${applied.proposal_file}\n` +
`commit=${applied.commit || "-"}\n` +
`${applied.detail || ""}`;
} catch (e) {
summaryEl.textContent = `Apply error: ${String(e)}`;
}
});
});
}
async function loadImproveHistory() {
const cfg = getConfig();
const summary = document.getElementById("improveSummary");
const list = document.getElementById("improveProposals");
summary.textContent = "Loading proposal history...";
try {
const data = await apiGet("/assistant/self-improve/history", {
release_name: cfg.releaseName || null,
limit: 200,
});
summary.textContent = `history sets=${data.count || 0}`;
renderProposalSets(data.rows || [], list, summary, cfg.releaseName || null);
} catch (e) {
summary.textContent = `Error: ${String(e)}`;
}
}
document.getElementById("saveConfig").addEventListener("click", saveConfig);
document.getElementById("refreshMeta").addEventListener("click", loadMeta);
document.getElementById("loadInbox").addEventListener("click", loadInbox);
document.getElementById("loadTasks").addEventListener("click", loadTasks);
document.getElementById("makeDraft").addEventListener("click", makeDraft);
document.getElementById("saveLearn").addEventListener("click", saveLearn);
document.getElementById("saveLearnWrite").addEventListener("click", saveLearnAndWriteCode);
document.getElementById("sendChat").addEventListener("click", sendChat);
document.getElementById("runImprove").addEventListener("click", runSelfImprove);
document.getElementById("loadImproveHistory").addEventListener("click", loadImproveHistory);
loadConfig();
loadMeta();
loadImproveHistory();

View file

@ -60,8 +60,15 @@
<input id="learnTitle" type="text" placeholder="Title (optional)" />
<input id="learnTags" type="text" placeholder="tags comma-separated (optional)" />
<button id="saveLearn">Save Note</button>
<button id="saveLearnWrite">Save + Write Code</button>
</div>
</div>
<div class="controls" style="margin-bottom:8px">
<input id="learnCodeObjective" type="text" placeholder="Code objective (optional)" />
<input id="learnCodeFiles" type="text" placeholder="code files (comma-separated)" />
<label><input id="learnCodeDryRun" type="checkbox" checked /> Dry-run</label>
<label><input id="learnCodeCommit" type="checkbox" /> Commit</label>
</div>
<textarea id="learnText" rows="3" placeholder="Knowledge note you want the assistant to remember"></textarea>
<pre id="learnOutput" class="output"></pre>
</section>
@ -77,6 +84,21 @@
<textarea id="chatMessage" rows="2" placeholder="Ask the assistant..."></textarea>
<div id="chatTranscript" class="list"></div>
</section>
<section class="panel">
<div class="panel-header">
<h2>Self-Improve</h2>
<div class="controls">
<input id="improveObjective" type="text" placeholder="Objective" />
<input id="improveMax" type="number" min="1" max="20" value="5" style="width:90px" />
<label><input id="improveDryRun" type="checkbox" /> Dry-run apply</label>
<button id="runImprove">Generate Proposals</button>
<button id="loadImproveHistory">Load History</button>
</div>
</div>
<pre id="improveSummary" class="output"></pre>
<div id="improveProposals" class="list"></div>
</section>
</main>
<script src="/ui/assets/app.js"></script>

View file

@ -0,0 +1,143 @@
import argparse
import base64
import json
from pyspark.sql import SparkSession, types as T
def d(s: str) -> str:
if not s:
return ""
return base64.b64decode(s.encode("ascii")).decode("utf-8")
def main() -> None:
p = argparse.ArgumentParser(description="Write assistant proposal set rows via Spark DataFrame")
p.add_argument("--table", required=True)
p.add_argument("--proposal-set-id", required=True)
p.add_argument("--created-at-utc", required=True)
p.add_argument("--objective-b64", default="")
p.add_argument("--release-name", default="")
p.add_argument("--summary-b64", default="")
p.add_argument("--signals-b64", default="")
p.add_argument("--proposals-b64", default="")
args = p.parse_args()
objective = d(args.objective_b64)
summary = d(args.summary_b64)
signals_json = d(args.signals_b64) or "{}"
proposals_json = d(args.proposals_b64) or "[]"
try:
signals_obj = json.loads(signals_json)
if not isinstance(signals_obj, dict):
signals_obj = {}
except Exception:
signals_obj = {}
signals_json = json.dumps(signals_obj, ensure_ascii=False, sort_keys=True)
try:
proposals_obj = json.loads(proposals_json)
if not isinstance(proposals_obj, list):
proposals_obj = []
except Exception:
proposals_obj = []
spark = SparkSession.builder.appName("write-assistant-proposals").getOrCreate()
spark.sql(
f"""
CREATE TABLE IF NOT EXISTS {args.table} (
proposal_set_id STRING,
created_at_utc STRING,
objective STRING,
release_name STRING,
summary STRING,
signals_json STRING,
proposal_id STRING,
title STRING,
problem STRING,
change_text STRING,
files_json STRING,
risk STRING,
tests_json STRING,
auto_apply_safe BOOLEAN
) USING iceberg
"""
)
schema = T.StructType(
[
T.StructField("proposal_set_id", T.StringType(), False),
T.StructField("created_at_utc", T.StringType(), False),
T.StructField("objective", T.StringType(), True),
T.StructField("release_name", T.StringType(), True),
T.StructField("summary", T.StringType(), True),
T.StructField("signals_json", T.StringType(), True),
T.StructField("proposal_id", T.StringType(), False),
T.StructField("title", T.StringType(), True),
T.StructField("problem", T.StringType(), True),
T.StructField("change_text", T.StringType(), True),
T.StructField("files_json", T.StringType(), True),
T.StructField("risk", T.StringType(), True),
T.StructField("tests_json", T.StringType(), True),
T.StructField("auto_apply_safe", T.BooleanType(), False),
]
)
rows = []
for idx, p_obj in enumerate(proposals_obj):
if not isinstance(p_obj, dict):
continue
files = p_obj.get("files")
tests = p_obj.get("tests")
if not isinstance(files, list):
files = []
if not isinstance(tests, list):
tests = []
proposal_id = str(p_obj.get("proposal_id") or f"P{idx+1}")
rows.append(
(
args.proposal_set_id,
args.created_at_utc,
objective,
args.release_name or "",
summary,
signals_json,
proposal_id,
str(p_obj.get("title") or ""),
str(p_obj.get("problem") or ""),
str(p_obj.get("change") or ""),
json.dumps(files, ensure_ascii=False),
str(p_obj.get("risk") or "medium"),
json.dumps(tests, ensure_ascii=False),
bool(p_obj.get("auto_apply_safe", False)),
)
)
if not rows:
rows.append(
(
args.proposal_set_id,
args.created_at_utc,
objective,
args.release_name or "",
summary,
signals_json,
"P0",
"No proposals",
"No proposals returned",
"",
"[]",
"low",
"[]",
False,
)
)
df = spark.createDataFrame(rows, schema=schema)
df.writeTo(args.table).append()
print(f"[DONE] Recorded proposal set {args.proposal_set_id} with {len(rows)} row(s) into {args.table}")
if __name__ == "__main__":
main()