Some tweaks.

This commit is contained in:
Carl Niklas Rydberg 2026-02-15 00:33:04 +01:00
parent f3e3f4f586
commit 89955662bf
3 changed files with 397 additions and 60 deletions

File diff suppressed because one or more lines are too long

442
app.py
View file

@ -40,6 +40,10 @@ IPFS_API = os.getenv("IPFS_API", "http://localhost:5001") # Kubo HTTP API
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
OLLAMA_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text") OLLAMA_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "local").strip().lower() # local|hybrid|openai
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip()
ALLOW_CLOUD_ESCALATION = os.getenv("ALLOW_CLOUD_ESCALATION", "0").strip().lower() in ("1", "true", "yes", "on")
PROJECTOR_SSH_HOST = os.getenv("PROJECTOR_SSH_HOST", "lakehouse-core.rakeroots.lan") PROJECTOR_SSH_HOST = os.getenv("PROJECTOR_SSH_HOST", "lakehouse-core.rakeroots.lan")
PROJECTOR_REMOTE_DIR = os.getenv("PROJECTOR_REMOTE_DIR", "/tmp/jecio") PROJECTOR_REMOTE_DIR = os.getenv("PROJECTOR_REMOTE_DIR", "/tmp/jecio")
PROJECTOR_REMOTE_SCRIPT = os.getenv("PROJECTOR_REMOTE_SCRIPT", "./run-projector-standard.sh") PROJECTOR_REMOTE_SCRIPT = os.getenv("PROJECTOR_REMOTE_SCRIPT", "./run-projector-standard.sh")
@ -208,6 +212,9 @@ class AssistantDraftResponse(BaseModel):
confidence: float confidence: float
needs_review: bool needs_review: bool
release_name: Optional[str] = None release_name: Optional[str] = None
provider_used: Literal["local", "openai"] = "local"
escalated: bool = False
redaction_stats: Dict[str, int] = Field(default_factory=dict)
class AssistantPlanPayload(BaseModel): class AssistantPlanPayload(BaseModel):
@ -302,6 +309,9 @@ class AssistantChatResponse(BaseModel):
sources: List[AssistantDraftSource] sources: List[AssistantDraftSource]
confidence: float confidence: float
release_name: Optional[str] = None release_name: Optional[str] = None
provider_used: Literal["local", "openai"] = "local"
escalated: bool = False
redaction_stats: Dict[str, int] = Field(default_factory=dict)
class AssistantSelfImprovePayload(BaseModel): class AssistantSelfImprovePayload(BaseModel):
@ -906,7 +916,16 @@ TASK_AI_CACHE_TTL_SEC = int(os.getenv("TASK_AI_CACHE_TTL_SEC", "3600"))
TASK_AI_CACHE_MAX_SIZE = int(os.getenv("TASK_AI_CACHE_MAX_SIZE", "5000")) TASK_AI_CACHE_MAX_SIZE = int(os.getenv("TASK_AI_CACHE_MAX_SIZE", "5000"))
TASK_AI_CACHE: Dict[str, Dict[str, Any]] = {} TASK_AI_CACHE: Dict[str, Dict[str, Any]] = {}
ASSISTANT_CHAT_MAX_TURNS = int(os.getenv("ASSISTANT_CHAT_MAX_TURNS", "20")) ASSISTANT_CHAT_MAX_TURNS = int(os.getenv("ASSISTANT_CHAT_MAX_TURNS", "20"))
ASSISTANT_CHAT_STORE_PATH = os.getenv("ASSISTANT_CHAT_STORE_PATH", ".assistant_chat_sessions.json")
ASSISTANT_CHAT_SESSIONS: Dict[str, List[Dict[str, str]]] = {} ASSISTANT_CHAT_SESSIONS: Dict[str, List[Dict[str, str]]] = {}
ASSISTANT_CHAT_MIN_TOKEN_OVERLAP = int(os.getenv("ASSISTANT_CHAT_MIN_TOKEN_OVERLAP", "1"))
ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER = int(os.getenv("ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER", "4"))
ASSISTANT_SOURCE_MIN_COVERAGE = float(os.getenv("ASSISTANT_SOURCE_MIN_COVERAGE", "0.6"))
ASSISTANT_QUERY_STOPWORDS = {
"the", "and", "for", "with", "that", "this", "from", "have", "has", "had", "are", "was", "were", "will",
"would", "should", "could", "can", "you", "your", "about", "what", "when", "where", "which", "who", "whom",
"why", "how", "tomorrow", "today", "please", "any", "there", "need", "want", "know", "does", "did", "done",
}
def _split_sentences(text: str) -> List[str]: def _split_sentences(text: str) -> List[str]:
@ -928,6 +947,115 @@ def _extract_who(text: str, default_sender: Optional[str]) -> Optional[str]:
return default_sender or None return default_sender or None
def _query_tokens(text: str) -> set[str]:
return {
t for t in re.findall(r"[a-z0-9]{3,}", (text or "").lower())
if t not in ASSISTANT_QUERY_STOPWORDS
}
def _source_text_for_match(src: Dict[str, Any]) -> str:
return " ".join(
[
str(src.get("text") or ""),
str(src.get("description") or ""),
str(src.get("summary") or ""),
str(src.get("display_name") or ""),
str(src.get("canonical_name") or ""),
]
)
def _filter_relevant_hits_for_query(hits: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
q_tokens = _query_tokens(query)
if not q_tokens:
return hits
out: List[Dict[str, Any]] = []
for h in hits:
src = h.get("_source", {}) or {}
s_tokens = _query_tokens(_source_text_for_match(src))
overlap = len(q_tokens.intersection(s_tokens))
if overlap >= ASSISTANT_CHAT_MIN_TOKEN_OVERLAP:
out.append(h)
return out
def _is_strong_source_match(query: str, src: Dict[str, Any]) -> bool:
q_tokens = _query_tokens(query)
if not q_tokens:
return True
s_tokens = _query_tokens(_source_text_for_match(src))
overlap = len(q_tokens.intersection(s_tokens))
q_len = max(1, len(q_tokens))
coverage = overlap / q_len
min_overlap = ASSISTANT_CHAT_MIN_TOKEN_OVERLAP
# For multi-term questions, require at least 2 matched terms by default.
if q_len >= 2:
min_overlap = max(min_overlap, 2)
return overlap >= min_overlap and coverage >= ASSISTANT_SOURCE_MIN_COVERAGE
async def _retrieve_sources_two_stage(
query: str,
release_name: Optional[str],
max_sources: int,
include_release_recent_fallback: bool = True,
) -> List[Dict[str, Any]]:
candidate_size = max(max_sources, max_sources * max(2, ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER))
seen_keys: set[str] = set()
candidates: List[Dict[str, Any]] = []
def _add_hits(hs: List[Dict[str, Any]]) -> None:
for h in hs:
src = h.get("_source", {}) or {}
key = str(src.get("concept_id") or src.get("source_pk") or "")
if not key:
key = hashlib.sha256(
json.dumps(src, ensure_ascii=False, sort_keys=True).encode("utf-8")
).hexdigest()[:20]
if key in seen_keys:
continue
seen_keys.add(key)
candidates.append(h)
# Stage 1: broad retrieval
try:
_add_hits(await es_search_hits(q=query, size=candidate_size, release_name=release_name))
except Exception as e:
print(f"[WARN] stage1 release search failed: {e}")
if len(candidates) < max_sources:
try:
_add_hits(await es_search_hits(q=query, size=candidate_size, release_name=None))
except Exception as e:
print(f"[WARN] stage1 global search failed: {e}")
if len(candidates) < max_sources and include_release_recent_fallback and release_name:
try:
_add_hits(await es_recent_by_release(release_name, size=candidate_size))
except Exception as e:
print(f"[WARN] stage1 release-recent fallback failed: {e}")
# Stage 2: relevance rerank and trim
q_tokens = _query_tokens(query)
ranked: List[Dict[str, Any]] = []
for h in candidates:
src = h.get("_source", {}) or {}
s_tokens = _query_tokens(_source_text_for_match(src))
overlap = len(q_tokens.intersection(s_tokens)) if q_tokens else 0
base_score = float(h.get("_score") or 0.0)
ranked.append({"hit": h, "overlap": overlap, "base_score": base_score})
ranked.sort(key=lambda x: (x["overlap"], x["base_score"]), reverse=True)
relevant = []
for x in ranked:
src = x["hit"].get("_source", {}) or {}
if _is_strong_source_match(query, src):
relevant.append(x)
if relevant:
return [x["hit"] for x in relevant[:max_sources]]
# Explicitly return no sources instead of attaching weak matches.
return []
def extract_pending_tasks_from_source(src: Dict[str, Any]) -> List[Dict[str, Any]]: def extract_pending_tasks_from_source(src: Dict[str, Any]) -> List[Dict[str, Any]]:
text = str(src.get("text") or src.get("description") or src.get("summary") or "").strip() text = str(src.get("text") or src.get("description") or src.get("summary") or "").strip()
if not text: if not text:
@ -1129,11 +1257,16 @@ def build_chat_prompt(
) )
context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context." context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
hist = "\n".join(history_lines) if history_lines else "(none)" hist = "\n".join(history_lines) if history_lines else "(none)"
now_local = datetime.now().astimezone()
now_line = now_local.strftime("%A, %Y-%m-%d %H:%M:%S %Z")
return ( return (
"You are a practical personal assistant. Be concise, factual, and useful.\n" "You are a practical personal assistant. Be concise, factual, and useful.\n"
"Use retrieved context when available. If uncertain, say so briefly and ask one clarifying question.\n" "Use both conversation history and retrieved context when available.\n"
"Treat prior turns in this session as valid context unless the user explicitly asks to reset/forget.\n"
"If uncertain, say so briefly and ask one clarifying question.\n"
"Do not claim external actions were already performed.\n\n" "Do not claim external actions were already performed.\n\n"
f"Current local datetime: {now_line}\n"
f"Release filter: {release_name or '(none)'}\n" f"Release filter: {release_name or '(none)'}\n"
f"Conversation history:\n{hist}\n\n" f"Conversation history:\n{hist}\n\n"
f"Retrieved context:\n{context}\n\n" f"Retrieved context:\n{context}\n\n"
@ -1149,6 +1282,68 @@ def _append_chat_turn(session_id: str, role: str, content: str) -> None:
if len(turns) > max_items: if len(turns) > max_items:
turns = turns[-max_items:] turns = turns[-max_items:]
ASSISTANT_CHAT_SESSIONS[session_id] = turns ASSISTANT_CHAT_SESSIONS[session_id] = turns
_save_chat_sessions()
def _load_chat_sessions() -> Dict[str, List[Dict[str, str]]]:
p = Path(ASSISTANT_CHAT_STORE_PATH)
if not p.exists() or not p.is_file():
return {}
try:
raw = p.read_text(encoding="utf-8", errors="replace")
obj = json.loads(raw)
except Exception:
return {}
if not isinstance(obj, dict):
return {}
out: Dict[str, List[Dict[str, str]]] = {}
for sid, turns in obj.items():
if not isinstance(sid, str) or not isinstance(turns, list):
continue
clean_turns: List[Dict[str, str]] = []
for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]:
if not isinstance(t, dict):
continue
role = str(t.get("role") or "").strip()
content = str(t.get("content") or "")
if role in ("user", "assistant") and content:
clean_turns.append({"role": role, "content": content})
if clean_turns:
out[sid] = clean_turns
return out
def _save_chat_sessions() -> None:
p = Path(ASSISTANT_CHAT_STORE_PATH)
try:
payload: Dict[str, List[Dict[str, str]]] = {}
for sid, turns in ASSISTANT_CHAT_SESSIONS.items():
if not isinstance(sid, str) or not isinstance(turns, list):
continue
payload[sid] = [
{"role": str(t.get("role") or ""), "content": str(t.get("content") or "")}
for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]
if isinstance(t, dict)
]
tmp = p.with_suffix(p.suffix + ".tmp")
tmp.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
tmp.replace(p)
except Exception as e:
print(f"[WARN] failed saving chat sessions: {e}")
# Initialize persisted chat memory at startup.
ASSISTANT_CHAT_SESSIONS = _load_chat_sessions()
def _build_chat_retrieval_query(message: str, history: List[Dict[str, str]]) -> str:
# Blend latest question with a small slice of recent user context.
parts: List[str] = [message.strip()]
user_turns = [t.get("content", "").strip() for t in history if t.get("role") == "user" and t.get("content")]
for t in user_turns[-3:]:
if t and t.lower() != message.strip().lower():
parts.append(t[:200])
return " ".join(p for p in parts if p).strip()
def _slugify(text: str) -> str: def _slugify(text: str) -> str:
@ -1297,6 +1492,136 @@ async def ollama_generate(prompt: str) -> str:
return (r.json().get("response") or "").strip() return (r.json().get("response") or "").strip()
def _redact_for_cloud(text: str) -> Dict[str, Any]:
out = text or ""
mapping: Dict[str, str] = {}
counters: Dict[str, int] = {}
stats: Dict[str, int] = {}
patterns = [
("EMAIL", re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")),
("PHONE", re.compile(r"\b(?:\+?\d[\d().\-\s]{6,}\d)\b")),
("IPV4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
("URL", re.compile(r"\bhttps?://[^\s]+")),
("INTERNAL_HOST", re.compile(r"\b[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.(?:lan|local|internal)\b")),
("OPENAI_KEY", re.compile(r"\bsk-[A-Za-z0-9\-_]{20,}\b")),
]
for label, pat in patterns:
counters[label] = 0
def repl(match: re.Match[str], _label: str = label) -> str:
counters[_label] += 1
placeholder = f"[[{_label}_{counters[_label]}]]"
mapping[placeholder] = match.group(0)
return placeholder
out = pat.sub(repl, out)
if counters[label] > 0:
stats[label.lower()] = counters[label]
stats["total"] = sum(v for k, v in stats.items() if k != "total")
return {"text": out, "mapping": mapping, "stats": stats}
def _rehydrate_from_cloud(text: str, mapping: Dict[str, str]) -> str:
out = text or ""
for placeholder, original in mapping.items():
out = out.replace(placeholder, original)
return out
def _looks_uncertain(text: str) -> bool:
t = (text or "").strip().lower()
if not t:
return True
cues = [
"i'm unsure",
"i am unsure",
"i don't have enough context",
"i do not have enough context",
"could you provide more",
"could you clarify",
"can you clarify",
"unable to provide",
"please provide more information",
"i need more context",
"i need more information",
]
if any(c in t for c in cues):
return True
return len(t) < 40
async def openai_generate(prompt: str) -> str:
if not OPENAI_API_KEY:
raise RuntimeError("OPENAI_API_KEY is not configured")
headers = {
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Content-Type": "application/json",
}
body = {
"model": OPENAI_MODEL,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.2,
}
async with httpx.AsyncClient(timeout=90) as h:
r = await h.post("https://api.openai.com/v1/chat/completions", headers=headers, json=body)
r.raise_for_status()
obj = r.json()
choices = obj.get("choices") or []
if not choices:
return ""
msg = choices[0].get("message") or {}
return str(msg.get("content") or "").strip()
async def llm_generate_with_router(prompt: str) -> Dict[str, Any]:
provider = LLM_PROVIDER if LLM_PROVIDER in ("local", "hybrid", "openai") else "local"
async def _cloud_call() -> Dict[str, Any]:
if not ALLOW_CLOUD_ESCALATION:
raise RuntimeError("Cloud escalation is disabled")
redacted = _redact_for_cloud(prompt)
cloud_text = await openai_generate(redacted["text"])
return {
"text": _rehydrate_from_cloud(cloud_text, redacted["mapping"]),
"provider_used": "openai",
"escalated": True,
"redaction_stats": redacted["stats"],
}
if provider == "openai":
try:
return await _cloud_call()
except Exception as e:
print(f"[WARN] openai provider failed, falling back to local: {e}")
local_text = await ollama_generate(prompt)
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
local_text = ""
local_err: Optional[Exception] = None
try:
local_text = await ollama_generate(prompt)
except Exception as e:
local_err = e
print(f"[WARN] local generation failed: {e}")
if provider == "local":
if local_err:
raise local_err
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
# hybrid: local first, escalate only on weak/uncertain outputs.
if local_text and not _looks_uncertain(local_text):
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
try:
return await _cloud_call()
except Exception as e:
print(f"[WARN] hybrid cloud escalation failed, keeping local result: {e}")
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
def fallback_draft_text(payload: AssistantDraftPayload) -> str: def fallback_draft_text(payload: AssistantDraftPayload) -> str:
recipient = payload.recipient or "there" recipient = payload.recipient or "there"
tone = (payload.tone or "").lower() tone = (payload.tone or "").lower()
@ -2639,33 +2964,36 @@ async def assistant_learn(payload: AssistantLearnPayload, x_admin_api_key: Optio
async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optional[str] = Header(default=None)): async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optional[str] = Header(default=None)):
check_admin_api_key(x_admin_api_key) check_admin_api_key(x_admin_api_key)
session_id = (payload.session_id or str(uuid.uuid4())).strip() session_id = (payload.session_id or str(uuid.uuid4())).strip()
msg_norm = (payload.message or "").strip().lower()
if re.search(r"\b(what day is it|what date is it|what day is it today|what date is it today)\b", msg_norm):
now_local = datetime.now().astimezone()
answer = f"Today is {now_local.strftime('%A, %B %d, %Y')}."
_append_chat_turn(session_id, "user", payload.message)
_append_chat_turn(session_id, "assistant", answer)
return AssistantChatResponse(
session_id=session_id,
answer=answer,
sources=[],
confidence=0.95,
release_name=payload.release_name,
provider_used="local",
escalated=False,
redaction_stats={},
)
history: List[Dict[str, str]] = [] history: List[Dict[str, str]] = []
if payload.history: if payload.history:
history = [{"role": h.role, "content": h.content} for h in payload.history if h.content.strip()] history = [{"role": h.role, "content": h.content} for h in payload.history if h.content.strip()]
else: else:
history = ASSISTANT_CHAT_SESSIONS.get(session_id, []) history = ASSISTANT_CHAT_SESSIONS.get(session_id, [])
retrieval_query = _build_chat_retrieval_query(payload.message, history)
hits: List[Dict[str, Any]] = [] hits: List[Dict[str, Any]] = await _retrieve_sources_two_stage(
try: query=retrieval_query,
hits = await es_search_hits( release_name=payload.release_name,
q=payload.message, max_sources=payload.max_sources,
size=payload.max_sources, include_release_recent_fallback=True,
release_name=payload.release_name, )
)
except Exception as e:
print(f"[WARN] assistant_chat retrieval failed: {e}")
hits = []
if not hits and payload.release_name:
try:
hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
except Exception as e:
print(f"[WARN] assistant_chat release fallback failed: {e}")
if not hits:
try:
hits = await es_recent_messages(size=payload.max_sources, release_name=payload.release_name, q=None)
except Exception as e:
print(f"[WARN] assistant_chat inbox fallback failed: {e}")
prompt = build_chat_prompt( prompt = build_chat_prompt(
user_message=payload.message, user_message=payload.message,
@ -2673,9 +3001,16 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
source_docs=hits, source_docs=hits,
release_name=payload.release_name, release_name=payload.release_name,
) )
provider_used: Literal["local", "openai"] = "local"
escalated = False
redaction_stats: Dict[str, int] = {}
try: try:
answer = await ollama_generate(prompt) llm = await llm_generate_with_router(prompt)
if not answer.strip(): answer = str(llm.get("text") or "").strip()
provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
escalated = bool(llm.get("escalated", False))
redaction_stats = llm.get("redaction_stats") or {}
if not answer:
answer = "I don't have enough context to answer confidently. Can you share one more detail?" answer = "I don't have enough context to answer confidently. Can you share one more detail?"
except Exception as e: except Exception as e:
print(f"[WARN] assistant_chat generation failed: {e}") print(f"[WARN] assistant_chat generation failed: {e}")
@ -2693,7 +3028,8 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
score=float(h.get("_score")) if h.get("_score") is not None else None, score=float(h.get("_score")) if h.get("_score") is not None else None,
) )
) )
source_count = len([s for s in sources if s.concept_id]) strong_sources = [s for s in sources if s.concept_id]
source_count = len(strong_sources)
confidence = 0.35 confidence = 0.35
if source_count >= 5: if source_count >= 5:
confidence = 0.85 confidence = 0.85
@ -2703,6 +3039,9 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
confidence = 0.6 confidence = 0.6
if len(answer.strip()) < 30: if len(answer.strip()) < 30:
confidence = min(confidence, 0.45) confidence = min(confidence, 0.45)
if _looks_uncertain(answer):
strong_sources = []
confidence = min(confidence, 0.4)
_append_chat_turn(session_id, "user", payload.message) _append_chat_turn(session_id, "user", payload.message)
_append_chat_turn(session_id, "assistant", answer) _append_chat_turn(session_id, "assistant", answer)
@ -2710,9 +3049,12 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
return AssistantChatResponse( return AssistantChatResponse(
session_id=session_id, session_id=session_id,
answer=answer, answer=answer,
sources=[s for s in sources if s.concept_id], sources=strong_sources,
confidence=confidence, confidence=confidence,
release_name=payload.release_name, release_name=payload.release_name,
provider_used=provider_used,
escalated=escalated,
redaction_stats=redaction_stats,
) )
@ -3110,43 +3452,24 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
" ".join(payload.constraints), " ".join(payload.constraints),
] ]
).strip() ).strip()
try: hits = await _retrieve_sources_two_stage(
hits = await es_search_hits( query=retrieval_query,
q=retrieval_query, release_name=payload.release_name,
size=payload.max_sources, max_sources=payload.max_sources,
release_name=payload.release_name, include_release_recent_fallback=True,
) )
except Exception as e:
print(f"[WARN] assistant_draft retrieval failed: {e}")
hits = []
if not hits and payload.release_name:
try:
hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
except Exception as e:
print(f"[WARN] assistant_draft release fallback retrieval failed: {e}")
if not hits and payload.release_name:
try:
hits = await es_recent_messages(
size=payload.max_sources,
release_name=payload.release_name,
q=None,
)
except Exception as e:
print(f"[WARN] assistant_draft inbox fallback retrieval failed: {e}")
if not hits:
try:
hits = await es_recent_messages(
size=payload.max_sources,
release_name=None,
q=None,
)
except Exception as e:
print(f"[WARN] assistant_draft global fallback retrieval failed: {e}")
prompt = build_assistant_prompt(payload, hits) prompt = build_assistant_prompt(payload, hits)
used_fallback = False used_fallback = False
provider_used: Literal["local", "openai"] = "local"
escalated = False
redaction_stats: Dict[str, int] = {}
try: try:
draft = await ollama_generate(prompt) llm = await llm_generate_with_router(prompt)
draft = str(llm.get("text") or "").strip()
provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
escalated = bool(llm.get("escalated", False))
redaction_stats = llm.get("redaction_stats") or {}
if not draft.strip(): if not draft.strip():
used_fallback = True used_fallback = True
draft = fallback_draft_text(payload) draft = fallback_draft_text(payload)
@ -3198,6 +3521,9 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
confidence=confidence, confidence=confidence,
needs_review=True, needs_review=True,
release_name=payload.release_name, release_name=payload.release_name,
provider_used=provider_used,
escalated=escalated,
redaction_stats=redaction_stats,
) )

View file

@ -146,7 +146,13 @@ async function makeDraft() {
max_sources: 5, max_sources: 5,
}); });
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", "); const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
out.textContent = `${data.draft || ""}\n\nconfidence=${data.confidence}\nneeds_review=${data.needs_review}\nsources=${sourceLine}`; out.textContent =
`${data.draft || ""}\n\n` +
`confidence=${data.confidence}\n` +
`needs_review=${data.needs_review}\n` +
`provider=${data.provider_used || "local"}\n` +
`escalated=${Boolean(data.escalated)}\n` +
`sources=${sourceLine}`;
} catch (e) { } catch (e) {
out.textContent = `Error: ${String(e)}`; out.textContent = `Error: ${String(e)}`;
} }
@ -261,7 +267,11 @@ async function sendChat() {
max_sources: 6, max_sources: 6,
}); });
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", "); const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
appendChat("assistant", data.answer || "", `confidence=${data.confidence} | sources=${sourceLine || "-"}`); appendChat(
"assistant",
data.answer || "",
`confidence=${data.confidence} | provider=${data.provider_used || "local"} | escalated=${Boolean(data.escalated)} | sources=${sourceLine || "-"}`
);
} catch (e) { } catch (e) {
appendChat("assistant", `Error: ${String(e)}`, ""); appendChat("assistant", `Error: ${String(e)}`, "");
} }