Some tweaks.

2026-02-15 00:33:04 +01:00 · 2026-02-15 00:33:04 +01:00 · 89955662bf
parent f3e3f4f586
commit 89955662bf
3 changed files with 397 additions and 60 deletions
--- a/.assistant_chat_sessions.json
+++ b/.assistant_chat_sessions.json
--- a/app.py
+++ b/app.py
@ -40,6 +40,10 @@ IPFS_API = os.getenv("IPFS_API", "http://localhost:5001")  # Kubo HTTP API
 OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
 OLLAMA_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")
 LLM_PROVIDER = os.getenv("LLM_PROVIDER", "local").strip().lower()  # local|hybrid|openai
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
 OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip()
 ALLOW_CLOUD_ESCALATION = os.getenv("ALLOW_CLOUD_ESCALATION", "0").strip().lower() in ("1", "true", "yes", "on")
 PROJECTOR_SSH_HOST = os.getenv("PROJECTOR_SSH_HOST", "lakehouse-core.rakeroots.lan")
 PROJECTOR_REMOTE_DIR = os.getenv("PROJECTOR_REMOTE_DIR", "/tmp/jecio")
 PROJECTOR_REMOTE_SCRIPT = os.getenv("PROJECTOR_REMOTE_SCRIPT", "./run-projector-standard.sh")
@ -208,6 +212,9 @@ class AssistantDraftResponse(BaseModel):
    confidence: float
    needs_review: bool
    release_name: Optional[str] = None
    provider_used: Literal["local", "openai"] = "local"
    escalated: bool = False
    redaction_stats: Dict[str, int] = Field(default_factory=dict)
 class AssistantPlanPayload(BaseModel):
@ -302,6 +309,9 @@ class AssistantChatResponse(BaseModel):
    sources: List[AssistantDraftSource]
    confidence: float
    release_name: Optional[str] = None
    provider_used: Literal["local", "openai"] = "local"
    escalated: bool = False
    redaction_stats: Dict[str, int] = Field(default_factory=dict)
 class AssistantSelfImprovePayload(BaseModel):
@ -906,7 +916,16 @@ TASK_AI_CACHE_TTL_SEC = int(os.getenv("TASK_AI_CACHE_TTL_SEC", "3600"))
 TASK_AI_CACHE_MAX_SIZE = int(os.getenv("TASK_AI_CACHE_MAX_SIZE", "5000"))
 TASK_AI_CACHE: Dict[str, Dict[str, Any]] = {}
 ASSISTANT_CHAT_MAX_TURNS = int(os.getenv("ASSISTANT_CHAT_MAX_TURNS", "20"))
 ASSISTANT_CHAT_STORE_PATH = os.getenv("ASSISTANT_CHAT_STORE_PATH", ".assistant_chat_sessions.json")
 ASSISTANT_CHAT_SESSIONS: Dict[str, List[Dict[str, str]]] = {}
 ASSISTANT_CHAT_MIN_TOKEN_OVERLAP = int(os.getenv("ASSISTANT_CHAT_MIN_TOKEN_OVERLAP", "1"))
 ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER = int(os.getenv("ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER", "4"))
 ASSISTANT_SOURCE_MIN_COVERAGE = float(os.getenv("ASSISTANT_SOURCE_MIN_COVERAGE", "0.6"))
 ASSISTANT_QUERY_STOPWORDS = {
    "the", "and", "for", "with", "that", "this", "from", "have", "has", "had", "are", "was", "were", "will",
    "would", "should", "could", "can", "you", "your", "about", "what", "when", "where", "which", "who", "whom",
    "why", "how", "tomorrow", "today", "please", "any", "there", "need", "want", "know", "does", "did", "done",
 }
 def _split_sentences(text: str) -> List[str]:
@ -928,6 +947,115 @@ def _extract_who(text: str, default_sender: Optional[str]) -> Optional[str]:
    return default_sender or None
 def _query_tokens(text: str) -> set[str]:
    return {
        t for t in re.findall(r"[a-z0-9]{3,}", (text or "").lower())
        if t not in ASSISTANT_QUERY_STOPWORDS
    }
 def _source_text_for_match(src: Dict[str, Any]) -> str:
    return " ".join(
        [
            str(src.get("text") or ""),
            str(src.get("description") or ""),
            str(src.get("summary") or ""),
            str(src.get("display_name") or ""),
            str(src.get("canonical_name") or ""),
        ]
    )
 def _filter_relevant_hits_for_query(hits: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
    q_tokens = _query_tokens(query)
    if not q_tokens:
        return hits
    out: List[Dict[str, Any]] = []
    for h in hits:
        src = h.get("_source", {}) or {}
        s_tokens = _query_tokens(_source_text_for_match(src))
        overlap = len(q_tokens.intersection(s_tokens))
        if overlap >= ASSISTANT_CHAT_MIN_TOKEN_OVERLAP:
            out.append(h)
    return out
 def _is_strong_source_match(query: str, src: Dict[str, Any]) -> bool:
    q_tokens = _query_tokens(query)
    if not q_tokens:
        return True
    s_tokens = _query_tokens(_source_text_for_match(src))
    overlap = len(q_tokens.intersection(s_tokens))
    q_len = max(1, len(q_tokens))
    coverage = overlap / q_len
    min_overlap = ASSISTANT_CHAT_MIN_TOKEN_OVERLAP
    # For multi-term questions, require at least 2 matched terms by default.
    if q_len >= 2:
        min_overlap = max(min_overlap, 2)
    return overlap >= min_overlap and coverage >= ASSISTANT_SOURCE_MIN_COVERAGE
 async def _retrieve_sources_two_stage(
    query: str,
    release_name: Optional[str],
    max_sources: int,
    include_release_recent_fallback: bool = True,
 ) -> List[Dict[str, Any]]:
    candidate_size = max(max_sources, max_sources * max(2, ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER))
    seen_keys: set[str] = set()
    candidates: List[Dict[str, Any]] = []
    def _add_hits(hs: List[Dict[str, Any]]) -> None:
        for h in hs:
            src = h.get("_source", {}) or {}
            key = str(src.get("concept_id") or src.get("source_pk") or "")
            if not key:
                key = hashlib.sha256(
                    json.dumps(src, ensure_ascii=False, sort_keys=True).encode("utf-8")
                ).hexdigest()[:20]
            if key in seen_keys:
                continue
            seen_keys.add(key)
            candidates.append(h)
    # Stage 1: broad retrieval
    try:
        _add_hits(await es_search_hits(q=query, size=candidate_size, release_name=release_name))
    except Exception as e:
        print(f"[WARN] stage1 release search failed: {e}")
    if len(candidates) < max_sources:
        try:
            _add_hits(await es_search_hits(q=query, size=candidate_size, release_name=None))
        except Exception as e:
            print(f"[WARN] stage1 global search failed: {e}")
    if len(candidates) < max_sources and include_release_recent_fallback and release_name:
        try:
            _add_hits(await es_recent_by_release(release_name, size=candidate_size))
        except Exception as e:
            print(f"[WARN] stage1 release-recent fallback failed: {e}")
    # Stage 2: relevance rerank and trim
    q_tokens = _query_tokens(query)
    ranked: List[Dict[str, Any]] = []
    for h in candidates:
        src = h.get("_source", {}) or {}
        s_tokens = _query_tokens(_source_text_for_match(src))
        overlap = len(q_tokens.intersection(s_tokens)) if q_tokens else 0
        base_score = float(h.get("_score") or 0.0)
        ranked.append({"hit": h, "overlap": overlap, "base_score": base_score})
    ranked.sort(key=lambda x: (x["overlap"], x["base_score"]), reverse=True)
    relevant = []
    for x in ranked:
        src = x["hit"].get("_source", {}) or {}
        if _is_strong_source_match(query, src):
            relevant.append(x)
    if relevant:
        return [x["hit"] for x in relevant[:max_sources]]
    # Explicitly return no sources instead of attaching weak matches.
    return []
 def extract_pending_tasks_from_source(src: Dict[str, Any]) -> List[Dict[str, Any]]:
    text = str(src.get("text") or src.get("description") or src.get("summary") or "").strip()
    if not text:
@ -1129,11 +1257,16 @@ def build_chat_prompt(
        )
    context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
    hist = "\n".join(history_lines) if history_lines else "(none)"
    now_local = datetime.now().astimezone()
    now_line = now_local.strftime("%A, %Y-%m-%d %H:%M:%S %Z")
    return (
        "You are a practical personal assistant. Be concise, factual, and useful.\n"
-        "Use retrieved context when available. If uncertain, say so briefly and ask one clarifying question.\n"
+        "Use both conversation history and retrieved context when available.\n"
        "Treat prior turns in this session as valid context unless the user explicitly asks to reset/forget.\n"
        "If uncertain, say so briefly and ask one clarifying question.\n"
        "Do not claim external actions were already performed.\n\n"
        f"Current local datetime: {now_line}\n"
        f"Release filter: {release_name or '(none)'}\n"
        f"Conversation history:\n{hist}\n\n"
        f"Retrieved context:\n{context}\n\n"
@ -1149,6 +1282,68 @@ def _append_chat_turn(session_id: str, role: str, content: str) -> None:
    if len(turns) > max_items:
        turns = turns[-max_items:]
    ASSISTANT_CHAT_SESSIONS[session_id] = turns
    _save_chat_sessions()
 def _load_chat_sessions() -> Dict[str, List[Dict[str, str]]]:
    p = Path(ASSISTANT_CHAT_STORE_PATH)
    if not p.exists() or not p.is_file():
        return {}
    try:
        raw = p.read_text(encoding="utf-8", errors="replace")
        obj = json.loads(raw)
    except Exception:
        return {}
    if not isinstance(obj, dict):
        return {}
    out: Dict[str, List[Dict[str, str]]] = {}
    for sid, turns in obj.items():
        if not isinstance(sid, str) or not isinstance(turns, list):
            continue
        clean_turns: List[Dict[str, str]] = []
        for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]:
            if not isinstance(t, dict):
                continue
            role = str(t.get("role") or "").strip()
            content = str(t.get("content") or "")
            if role in ("user", "assistant") and content:
                clean_turns.append({"role": role, "content": content})
        if clean_turns:
            out[sid] = clean_turns
    return out
 def _save_chat_sessions() -> None:
    p = Path(ASSISTANT_CHAT_STORE_PATH)
    try:
        payload: Dict[str, List[Dict[str, str]]] = {}
        for sid, turns in ASSISTANT_CHAT_SESSIONS.items():
            if not isinstance(sid, str) or not isinstance(turns, list):
                continue
            payload[sid] = [
                {"role": str(t.get("role") or ""), "content": str(t.get("content") or "")}
                for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]
                if isinstance(t, dict)
            ]
        tmp = p.with_suffix(p.suffix + ".tmp")
        tmp.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
        tmp.replace(p)
    except Exception as e:
        print(f"[WARN] failed saving chat sessions: {e}")
 # Initialize persisted chat memory at startup.
 ASSISTANT_CHAT_SESSIONS = _load_chat_sessions()
 def _build_chat_retrieval_query(message: str, history: List[Dict[str, str]]) -> str:
    # Blend latest question with a small slice of recent user context.
    parts: List[str] = [message.strip()]
    user_turns = [t.get("content", "").strip() for t in history if t.get("role") == "user" and t.get("content")]
    for t in user_turns[-3:]:
        if t and t.lower() != message.strip().lower():
            parts.append(t[:200])
    return " ".join(p for p in parts if p).strip()
 def _slugify(text: str) -> str:
@ -1297,6 +1492,136 @@ async def ollama_generate(prompt: str) -> str:
        return (r.json().get("response") or "").strip()
 def _redact_for_cloud(text: str) -> Dict[str, Any]:
    out = text or ""
    mapping: Dict[str, str] = {}
    counters: Dict[str, int] = {}
    stats: Dict[str, int] = {}
    patterns = [
        ("EMAIL", re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")),
        ("PHONE", re.compile(r"\b(?:\+?\d[\d().\-\s]{6,}\d)\b")),
        ("IPV4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
        ("URL", re.compile(r"\bhttps?://[^\s]+")),
        ("INTERNAL_HOST", re.compile(r"\b[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.(?:lan|local|internal)\b")),
        ("OPENAI_KEY", re.compile(r"\bsk-[A-Za-z0-9\-_]{20,}\b")),
    ]
    for label, pat in patterns:
        counters[label] = 0
        def repl(match: re.Match[str], _label: str = label) -> str:
            counters[_label] += 1
            placeholder = f"[[{_label}_{counters[_label]}]]"
            mapping[placeholder] = match.group(0)
            return placeholder
        out = pat.sub(repl, out)
        if counters[label] > 0:
            stats[label.lower()] = counters[label]
    stats["total"] = sum(v for k, v in stats.items() if k != "total")
    return {"text": out, "mapping": mapping, "stats": stats}
 def _rehydrate_from_cloud(text: str, mapping: Dict[str, str]) -> str:
    out = text or ""
    for placeholder, original in mapping.items():
        out = out.replace(placeholder, original)
    return out
 def _looks_uncertain(text: str) -> bool:
    t = (text or "").strip().lower()
    if not t:
        return True
    cues = [
        "i'm unsure",
        "i am unsure",
        "i don't have enough context",
        "i do not have enough context",
        "could you provide more",
        "could you clarify",
        "can you clarify",
        "unable to provide",
        "please provide more information",
        "i need more context",
        "i need more information",
    ]
    if any(c in t for c in cues):
        return True
    return len(t) < 40
 async def openai_generate(prompt: str) -> str:
    if not OPENAI_API_KEY:
        raise RuntimeError("OPENAI_API_KEY is not configured")
    headers = {
        "Authorization": f"Bearer {OPENAI_API_KEY}",
        "Content-Type": "application/json",
    }
    body = {
        "model": OPENAI_MODEL,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.2,
    }
    async with httpx.AsyncClient(timeout=90) as h:
        r = await h.post("https://api.openai.com/v1/chat/completions", headers=headers, json=body)
        r.raise_for_status()
        obj = r.json()
        choices = obj.get("choices") or []
        if not choices:
            return ""
        msg = choices[0].get("message") or {}
        return str(msg.get("content") or "").strip()
 async def llm_generate_with_router(prompt: str) -> Dict[str, Any]:
    provider = LLM_PROVIDER if LLM_PROVIDER in ("local", "hybrid", "openai") else "local"
    async def _cloud_call() -> Dict[str, Any]:
        if not ALLOW_CLOUD_ESCALATION:
            raise RuntimeError("Cloud escalation is disabled")
        redacted = _redact_for_cloud(prompt)
        cloud_text = await openai_generate(redacted["text"])
        return {
            "text": _rehydrate_from_cloud(cloud_text, redacted["mapping"]),
            "provider_used": "openai",
            "escalated": True,
            "redaction_stats": redacted["stats"],
        }
    if provider == "openai":
        try:
            return await _cloud_call()
        except Exception as e:
            print(f"[WARN] openai provider failed, falling back to local: {e}")
            local_text = await ollama_generate(prompt)
            return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
    local_text = ""
    local_err: Optional[Exception] = None
    try:
        local_text = await ollama_generate(prompt)
    except Exception as e:
        local_err = e
        print(f"[WARN] local generation failed: {e}")
    if provider == "local":
        if local_err:
            raise local_err
        return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
    # hybrid: local first, escalate only on weak/uncertain outputs.
    if local_text and not _looks_uncertain(local_text):
        return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
    try:
        return await _cloud_call()
    except Exception as e:
        print(f"[WARN] hybrid cloud escalation failed, keeping local result: {e}")
        return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
 def fallback_draft_text(payload: AssistantDraftPayload) -> str:
    recipient = payload.recipient or "there"
    tone = (payload.tone or "").lower()
@ -2639,33 +2964,36 @@ async def assistant_learn(payload: AssistantLearnPayload, x_admin_api_key: Optio
 async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optional[str] = Header(default=None)):
    check_admin_api_key(x_admin_api_key)
    session_id = (payload.session_id or str(uuid.uuid4())).strip()
    msg_norm = (payload.message or "").strip().lower()
    if re.search(r"\b(what day is it|what date is it|what day is it today|what date is it today)\b", msg_norm):
        now_local = datetime.now().astimezone()
        answer = f"Today is {now_local.strftime('%A, %B %d, %Y')}."
        _append_chat_turn(session_id, "user", payload.message)
        _append_chat_turn(session_id, "assistant", answer)
        return AssistantChatResponse(
            session_id=session_id,
            answer=answer,
            sources=[],
            confidence=0.95,
            release_name=payload.release_name,
            provider_used="local",
            escalated=False,
            redaction_stats={},
        )
    history: List[Dict[str, str]] = []
    if payload.history:
        history = [{"role": h.role, "content": h.content} for h in payload.history if h.content.strip()]
    else:
        history = ASSISTANT_CHAT_SESSIONS.get(session_id, [])
    retrieval_query = _build_chat_retrieval_query(payload.message, history)
-    hits: List[Dict[str, Any]] = []
+    hits: List[Dict[str, Any]] = await _retrieve_sources_two_stage(
-    try:
+        query=retrieval_query,
-        hits = await es_search_hits(
+        release_name=payload.release_name,
-            q=payload.message,
+        max_sources=payload.max_sources,
-            size=payload.max_sources,
+        include_release_recent_fallback=True,
-            release_name=payload.release_name,
+    )
        )
    except Exception as e:
        print(f"[WARN] assistant_chat retrieval failed: {e}")
        hits = []
    if not hits and payload.release_name:
        try:
            hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
        except Exception as e:
            print(f"[WARN] assistant_chat release fallback failed: {e}")
    if not hits:
        try:
            hits = await es_recent_messages(size=payload.max_sources, release_name=payload.release_name, q=None)
        except Exception as e:
            print(f"[WARN] assistant_chat inbox fallback failed: {e}")
    prompt = build_chat_prompt(
        user_message=payload.message,
@ -2673,9 +3001,16 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
        source_docs=hits,
        release_name=payload.release_name,
    )
    provider_used: Literal["local", "openai"] = "local"
    escalated = False
    redaction_stats: Dict[str, int] = {}
    try:
-        answer = await ollama_generate(prompt)
+        llm = await llm_generate_with_router(prompt)
-        if not answer.strip():
+        answer = str(llm.get("text") or "").strip()
        provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
        escalated = bool(llm.get("escalated", False))
        redaction_stats = llm.get("redaction_stats") or {}
        if not answer:
            answer = "I don't have enough context to answer confidently. Can you share one more detail?"
    except Exception as e:
        print(f"[WARN] assistant_chat generation failed: {e}")
@ -2693,7 +3028,8 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
                score=float(h.get("_score")) if h.get("_score") is not None else None,
            )
        )
-    source_count = len([s for s in sources if s.concept_id])
+    strong_sources = [s for s in sources if s.concept_id]
    source_count = len(strong_sources)
    confidence = 0.35
    if source_count >= 5:
        confidence = 0.85
@ -2703,6 +3039,9 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
        confidence = 0.6
    if len(answer.strip()) < 30:
        confidence = min(confidence, 0.45)
    if _looks_uncertain(answer):
        strong_sources = []
        confidence = min(confidence, 0.4)
    _append_chat_turn(session_id, "user", payload.message)
    _append_chat_turn(session_id, "assistant", answer)
@ -2710,9 +3049,12 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
    return AssistantChatResponse(
        session_id=session_id,
        answer=answer,
-        sources=[s for s in sources if s.concept_id],
+        sources=strong_sources,
        confidence=confidence,
        release_name=payload.release_name,
        provider_used=provider_used,
        escalated=escalated,
        redaction_stats=redaction_stats,
    )
@ -3110,43 +3452,24 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
            " ".join(payload.constraints),
        ]
    ).strip()
-    try:
+    hits = await _retrieve_sources_two_stage(
-        hits = await es_search_hits(
+        query=retrieval_query,
-            q=retrieval_query,
+        release_name=payload.release_name,
-            size=payload.max_sources,
+        max_sources=payload.max_sources,
-            release_name=payload.release_name,
+        include_release_recent_fallback=True,
-        )
+    )
    except Exception as e:
        print(f"[WARN] assistant_draft retrieval failed: {e}")
        hits = []
    if not hits and payload.release_name:
        try:
            hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
        except Exception as e:
            print(f"[WARN] assistant_draft release fallback retrieval failed: {e}")
    if not hits and payload.release_name:
        try:
            hits = await es_recent_messages(
                size=payload.max_sources,
                release_name=payload.release_name,
                q=None,
            )
        except Exception as e:
            print(f"[WARN] assistant_draft inbox fallback retrieval failed: {e}")
    if not hits:
        try:
            hits = await es_recent_messages(
                size=payload.max_sources,
                release_name=None,
                q=None,
            )
        except Exception as e:
            print(f"[WARN] assistant_draft global fallback retrieval failed: {e}")
    prompt = build_assistant_prompt(payload, hits)
    used_fallback = False
    provider_used: Literal["local", "openai"] = "local"
    escalated = False
    redaction_stats: Dict[str, int] = {}
    try:
-        draft = await ollama_generate(prompt)
+        llm = await llm_generate_with_router(prompt)
        draft = str(llm.get("text") or "").strip()
        provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
        escalated = bool(llm.get("escalated", False))
        redaction_stats = llm.get("redaction_stats") or {}
        if not draft.strip():
            used_fallback = True
            draft = fallback_draft_text(payload)
@ -3198,6 +3521,9 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
        confidence=confidence,
        needs_review=True,
        release_name=payload.release_name,
        provider_used=provider_used,
        escalated=escalated,
        redaction_stats=redaction_stats,
    )
--- a/ui/assets/app.js
+++ b/ui/assets/app.js
@ -146,7 +146,13 @@ async function makeDraft() {
      max_sources: 5,
    });
    const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
-    out.textContent = `${data.draft || ""}\n\nconfidence=${data.confidence}\nneeds_review=${data.needs_review}\nsources=${sourceLine}`;
+    out.textContent =
      `${data.draft || ""}\n\n` +
      `confidence=${data.confidence}\n` +
      `needs_review=${data.needs_review}\n` +
      `provider=${data.provider_used || "local"}\n` +
      `escalated=${Boolean(data.escalated)}\n` +
      `sources=${sourceLine}`;
  } catch (e) {
    out.textContent = `Error: ${String(e)}`;
  }
@ -261,7 +267,11 @@ async function sendChat() {
      max_sources: 6,
    });
    const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
-    appendChat("assistant", data.answer || "", `confidence=${data.confidence} | sources=${sourceLine || "-"}`);
+    appendChat(
      "assistant",
      data.answer || "",
      `confidence=${data.confidence} | provider=${data.provider_used || "local"} | escalated=${Boolean(data.escalated)} | sources=${sourceLine || "-"}`
    );
  } catch (e) {
    appendChat("assistant", `Error: ${String(e)}`, "");
  }