feat(assistant): add self-improvement proposal endpoint from feedback and blocked actions
This commit is contained in:
parent
cff60dcf3e
commit
47ae5891e1
239
app.py
239
app.py
|
|
@ -290,6 +290,39 @@ class AssistantChatResponse(BaseModel):
|
||||||
release_name: Optional[str] = None
|
release_name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantSelfImprovePayload(BaseModel):
|
||||||
|
objective: str = "Improve assistant quality and reliability"
|
||||||
|
release_name: Optional[str] = None
|
||||||
|
max_proposals: int = Field(default=5, ge=1, le=20)
|
||||||
|
feedback_limit: int = Field(default=50, ge=1, le=500)
|
||||||
|
action_limit: int = Field(default=50, ge=1, le=500)
|
||||||
|
include_edited_feedback: bool = True
|
||||||
|
include_rejected_feedback: bool = True
|
||||||
|
include_blocked_actions: bool = True
|
||||||
|
apply: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantSelfImproveProposal(BaseModel):
|
||||||
|
proposal_id: str
|
||||||
|
title: str
|
||||||
|
problem: str
|
||||||
|
change: str
|
||||||
|
files: List[str] = Field(default_factory=list)
|
||||||
|
risk: Literal["low", "medium", "high"] = "medium"
|
||||||
|
tests: List[str] = Field(default_factory=list)
|
||||||
|
auto_apply_safe: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantSelfImproveResponse(BaseModel):
|
||||||
|
objective: str
|
||||||
|
release_name: Optional[str] = None
|
||||||
|
summary: str
|
||||||
|
proposals: List[AssistantSelfImproveProposal]
|
||||||
|
signals: Dict[str, Any]
|
||||||
|
apply_blocked: bool
|
||||||
|
apply_block_reason: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# --------- helpers ---------
|
# --------- helpers ---------
|
||||||
def now_iso() -> str:
|
def now_iso() -> str:
|
||||||
return datetime.now(timezone.utc).isoformat()
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
@ -1082,6 +1115,86 @@ def _append_chat_turn(session_id: str, role: str, content: str) -> None:
|
||||||
ASSISTANT_CHAT_SESSIONS[session_id] = turns
|
ASSISTANT_CHAT_SESSIONS[session_id] = turns
|
||||||
|
|
||||||
|
|
||||||
|
def build_self_improve_prompt(
|
||||||
|
payload: AssistantSelfImprovePayload,
|
||||||
|
feedback_rows: List[Dict[str, Any]],
|
||||||
|
action_rows: List[Dict[str, Any]],
|
||||||
|
) -> str:
|
||||||
|
feedback_lines: List[str] = []
|
||||||
|
for r in feedback_rows[: payload.feedback_limit]:
|
||||||
|
feedback_lines.append(
|
||||||
|
" | ".join(
|
||||||
|
[
|
||||||
|
f"outcome={r.get('outcome')}",
|
||||||
|
f"task_type={r.get('task_type')}",
|
||||||
|
f"release={r.get('release_name')}",
|
||||||
|
f"goal={str(r.get('goal') or '')[:200]}",
|
||||||
|
f"notes={str(r.get('notes') or '')[:200]}",
|
||||||
|
f"draft={str(r.get('draft_text') or '')[:220]}",
|
||||||
|
f"final={str(r.get('final_text') or '')[:220]}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
action_lines: List[str] = []
|
||||||
|
for r in action_rows[: payload.action_limit]:
|
||||||
|
action_lines.append(
|
||||||
|
" | ".join(
|
||||||
|
[
|
||||||
|
f"status={r.get('status')}",
|
||||||
|
f"task_type={r.get('task_type')}",
|
||||||
|
f"step_id={r.get('step_id')}",
|
||||||
|
f"action_type={r.get('action_type')}",
|
||||||
|
f"title={str(r.get('step_title') or '')[:160]}",
|
||||||
|
f"error={str(r.get('error_text') or '')[:220]}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
feedback_block = "\n".join(feedback_lines) if feedback_lines else "(none)"
|
||||||
|
action_block = "\n".join(action_lines) if action_lines else "(none)"
|
||||||
|
return (
|
||||||
|
"You are a senior reliability engineer. Propose concrete code improvements.\n"
|
||||||
|
"Return JSON only with exact shape:\n"
|
||||||
|
'{'
|
||||||
|
'"summary":"...",'
|
||||||
|
'"proposals":[{"proposal_id":"P1","title":"...","problem":"...","change":"...","files":["app.py"],'
|
||||||
|
'"risk":"low|medium|high","tests":["..."],"auto_apply_safe":true|false}]}'
|
||||||
|
"\n"
|
||||||
|
f"Create at most {payload.max_proposals} proposals.\n"
|
||||||
|
"Prioritize high-impact, low-risk changes first.\n"
|
||||||
|
"Only propose changes grounded in the provided signals.\n\n"
|
||||||
|
f"Objective: {payload.objective}\n"
|
||||||
|
f"Release filter: {payload.release_name or '(none)'}\n\n"
|
||||||
|
f"Feedback signals:\n{feedback_block}\n\n"
|
||||||
|
f"Action signals:\n{action_block}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fallback_self_improve_proposals() -> List[AssistantSelfImproveProposal]:
|
||||||
|
return [
|
||||||
|
AssistantSelfImproveProposal(
|
||||||
|
proposal_id="P1",
|
||||||
|
title="Improve retrieval fallback ordering",
|
||||||
|
problem="Low-confidence drafts still occur when release-filtered retrieval returns no hits.",
|
||||||
|
change="Add deterministic fallback chain and expose retrieval diagnostics in API responses.",
|
||||||
|
files=["app.py"],
|
||||||
|
risk="low",
|
||||||
|
tests=["Call /assistant/draft with missing release_name and verify non-empty sources fallback."],
|
||||||
|
auto_apply_safe=True,
|
||||||
|
),
|
||||||
|
AssistantSelfImproveProposal(
|
||||||
|
proposal_id="P2",
|
||||||
|
title="Add deterministic task extraction benchmark route",
|
||||||
|
problem="Task extraction quality is hard to evaluate consistently over time.",
|
||||||
|
change="Introduce an eval endpoint with fixed fixtures and compare AI vs heuristic precision.",
|
||||||
|
files=["app.py", "tests/"],
|
||||||
|
risk="medium",
|
||||||
|
tests=["Run fixtures for promotional and actionable emails and assert expected tasks count."],
|
||||||
|
auto_apply_safe=False,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def build_assistant_prompt(payload: AssistantDraftPayload, source_docs: List[Dict[str, Any]]) -> str:
|
def build_assistant_prompt(payload: AssistantDraftPayload, source_docs: List[Dict[str, Any]]) -> str:
|
||||||
recipient = payload.recipient or "unspecified recipient"
|
recipient = payload.recipient or "unspecified recipient"
|
||||||
tone = payload.tone or "professional"
|
tone = payload.tone or "professional"
|
||||||
|
|
@ -2311,6 +2424,132 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/assistant/self-improve", response_model=AssistantSelfImproveResponse)
|
||||||
|
async def assistant_self_improve(
|
||||||
|
payload: AssistantSelfImprovePayload,
|
||||||
|
x_admin_api_key: Optional[str] = Header(default=None),
|
||||||
|
):
|
||||||
|
check_admin_api_key(x_admin_api_key)
|
||||||
|
|
||||||
|
feedback_rows: List[Dict[str, Any]] = []
|
||||||
|
if payload.include_edited_feedback:
|
||||||
|
try:
|
||||||
|
res = await run_remote_query_assistant_feedback(
|
||||||
|
outcome="edited",
|
||||||
|
task_type=None,
|
||||||
|
release_name=payload.release_name,
|
||||||
|
limit=payload.feedback_limit,
|
||||||
|
)
|
||||||
|
feedback_rows.extend(res.get("rows", []))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] self-improve edited feedback query failed: {e}")
|
||||||
|
if payload.include_rejected_feedback:
|
||||||
|
try:
|
||||||
|
res = await run_remote_query_assistant_feedback(
|
||||||
|
outcome="rejected",
|
||||||
|
task_type=None,
|
||||||
|
release_name=payload.release_name,
|
||||||
|
limit=payload.feedback_limit,
|
||||||
|
)
|
||||||
|
feedback_rows.extend(res.get("rows", []))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] self-improve rejected feedback query failed: {e}")
|
||||||
|
|
||||||
|
action_rows: List[Dict[str, Any]] = []
|
||||||
|
if payload.include_blocked_actions:
|
||||||
|
try:
|
||||||
|
res = await run_remote_query_assistant_actions(
|
||||||
|
status="blocked",
|
||||||
|
task_type=None,
|
||||||
|
release_name=payload.release_name,
|
||||||
|
step_id=None,
|
||||||
|
action_type=None,
|
||||||
|
limit=payload.action_limit,
|
||||||
|
)
|
||||||
|
action_rows.extend(res.get("rows", []))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] self-improve blocked actions query failed: {e}")
|
||||||
|
|
||||||
|
# De-duplicate simple repeats.
|
||||||
|
seen_feedback: set[str] = set()
|
||||||
|
dedup_feedback: List[Dict[str, Any]] = []
|
||||||
|
for r in feedback_rows:
|
||||||
|
key = str(r.get("feedback_id") or "") or hashlib.sha256(
|
||||||
|
json.dumps(r, ensure_ascii=False, sort_keys=True).encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
if key in seen_feedback:
|
||||||
|
continue
|
||||||
|
seen_feedback.add(key)
|
||||||
|
dedup_feedback.append(r)
|
||||||
|
feedback_rows = dedup_feedback[: payload.feedback_limit]
|
||||||
|
|
||||||
|
seen_actions: set[str] = set()
|
||||||
|
dedup_actions: List[Dict[str, Any]] = []
|
||||||
|
for r in action_rows:
|
||||||
|
key = str(r.get("action_id") or "") or hashlib.sha256(
|
||||||
|
json.dumps(r, ensure_ascii=False, sort_keys=True).encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
if key in seen_actions:
|
||||||
|
continue
|
||||||
|
seen_actions.add(key)
|
||||||
|
dedup_actions.append(r)
|
||||||
|
action_rows = dedup_actions[: payload.action_limit]
|
||||||
|
|
||||||
|
prompt = build_self_improve_prompt(payload, feedback_rows, action_rows)
|
||||||
|
proposals: List[AssistantSelfImproveProposal] = []
|
||||||
|
summary = "Generated from fallback proposal set."
|
||||||
|
try:
|
||||||
|
raw = await ollama_generate(prompt)
|
||||||
|
parsed = _extract_json_object_from_text(raw)
|
||||||
|
summary = str(parsed.get("summary") or "").strip() or summary
|
||||||
|
raw_proposals = parsed.get("proposals")
|
||||||
|
if isinstance(raw_proposals, list):
|
||||||
|
for idx, p in enumerate(raw_proposals[: payload.max_proposals]):
|
||||||
|
if not isinstance(p, dict):
|
||||||
|
continue
|
||||||
|
risk_raw = str(p.get("risk") or "medium").strip().lower()
|
||||||
|
risk: Literal["low", "medium", "high"] = "medium"
|
||||||
|
if risk_raw in ("low", "medium", "high"):
|
||||||
|
risk = risk_raw # type: ignore[assignment]
|
||||||
|
files = [str(x) for x in (p.get("files") or []) if str(x).strip()]
|
||||||
|
tests = [str(x) for x in (p.get("tests") or []) if str(x).strip()]
|
||||||
|
proposals.append(
|
||||||
|
AssistantSelfImproveProposal(
|
||||||
|
proposal_id=str(p.get("proposal_id") or f"P{idx+1}"),
|
||||||
|
title=str(p.get("title") or "").strip() or f"Proposal {idx+1}",
|
||||||
|
problem=str(p.get("problem") or "").strip() or "Unspecified problem.",
|
||||||
|
change=str(p.get("change") or "").strip() or "No change details provided.",
|
||||||
|
files=files,
|
||||||
|
risk=risk,
|
||||||
|
tests=tests,
|
||||||
|
auto_apply_safe=bool(p.get("auto_apply_safe", False)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] self-improve generation failed: {e}")
|
||||||
|
|
||||||
|
if not proposals:
|
||||||
|
proposals = fallback_self_improve_proposals()[: payload.max_proposals]
|
||||||
|
|
||||||
|
apply_blocked = payload.apply
|
||||||
|
apply_block_reason: Optional[str] = None
|
||||||
|
if payload.apply:
|
||||||
|
apply_block_reason = "Auto-apply is not enabled yet. Use proposals as PR/patch inputs."
|
||||||
|
|
||||||
|
return AssistantSelfImproveResponse(
|
||||||
|
objective=payload.objective,
|
||||||
|
release_name=payload.release_name,
|
||||||
|
summary=summary,
|
||||||
|
proposals=proposals[: payload.max_proposals],
|
||||||
|
signals={
|
||||||
|
"feedback_rows": len(feedback_rows),
|
||||||
|
"blocked_action_rows": len(action_rows),
|
||||||
|
},
|
||||||
|
apply_blocked=apply_blocked,
|
||||||
|
apply_block_reason=apply_block_reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/assistant/draft", response_model=AssistantDraftResponse)
|
@app.post("/assistant/draft", response_model=AssistantDraftResponse)
|
||||||
async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optional[str] = Header(default=None)):
|
async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optional[str] = Header(default=None)):
|
||||||
check_admin_api_key(x_admin_api_key)
|
check_admin_api_key(x_admin_api_key)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue