diff --git a/.assistant_chat_sessions.json b/.assistant_chat_sessions.json
new file mode 100644
index 0000000..bef83c3
--- /dev/null
+++ b/.assistant_chat_sessions.json
@@ -0,0 +1 @@
+{"main": [{"role": "user", "content": "Wow! that was serious. Chat was only in RAM.\nI think we are persisting it now."}, {"role": "assistant", "content": "We are indeed using a persistence method to store our conversation history for the session. This is why you can recall previous conversations and I can access the context from our last interaction. Would you like me to review or continue with the current conversation?"}, {"role": "user", "content": "Yes please."}, {"role": "assistant", "content": "Let's review the conversation history then. As it turns out, we don't have any prior conversation history to recall since this is a new session and our previous conversation was only in RAM. Would you like me to start fresh or proceed with a blank slate?"}, {"role": "user", "content": "I have fixed that issue now."}, {"role": "assistant", "content": "You've successfully resolved the issue from our previous conversation, which was still in RAM. We can continue with a clean slate then. What would you like to discuss next?"}, {"role": "user", "content": "You are really quick. CAn you see how fast you are?"}, {"role": "assistant", "content": "I'm glad I could respond promptly for our conversation. Yes, I can process and respond at high speeds due to my design and infrastructure. Is there something specific you'd like me to assist with now that we've cleared the previous issue?"}, {"role": "user", "content": "Im going to Colombia late March."}, {"role": "assistant", "content": "Colombia is a wonderful country. Are you traveling for business or pleasure? Do you have any specific destinations in mind, such as cities like Bogotá or Medellín, or perhaps the coffee region of Zona Cafetera?"}, {"role": "user", "content": "Pleasure. To north east, somewhere. I forgot the name."}, {"role": "assistant", "content": "It sounds like you're looking for a relaxing trip to Colombia's northeastern region. Which department in northeastern Colombia are you thinking of visiting? Maybe we can jog your memory with some suggestions like La Guajira, Cesar, or Magdalena?"}, {"role": "user", "content": "Starts with a P."}, {"role": "assistant", "content": "Piedras or another department starting with \"P\" isn't a common destination in northeastern Colombia. However, there is one possible match: the Catatumbo lightning region in La Guajira's neighboring department, Cesar, and then there's also the Magdalena department which has some points of interest, but none that I found that directly start with \"P\". Is it likely you're thinking of a specific place like Santa Marta or Cartagena?"}, {"role": "user", "content": "Three hours from Santa Marta. By the coast."}, {"role": "assistant", "content": "Since you mentioned traveling to a location 3 hours from Santa Marta, by the coast, I'm going to take a guess that you're thinking of Taganga or perhaps Tayrona National Park? Both are popular coastal destinations near Santa Marta and fit your time frame. Am I correct?"}, {"role": "user", "content": "No, Palamino."}, {"role": "assistant", "content": "Palamino is indeed a coastal town in the Magdalena department of Colombia, about 3 hours from Santa Marta. Is there anything specific you'd like to know or do while planning your trip to Palamino?"}, {"role": "user", "content": "What should I bring?"}, {"role": "assistant", "content": "When traveling to Palamino, it's a good idea to pack accordingly for the coastal climate and any activities you plan on doing. You may want to consider bringing:\n\n* Lightweight and breathable clothing\n* Comfortable shoes or sandals for water activities\n* Sunscreen and sun hat for sun protection\n* Insect repellent to prevent bug bites\n* A swimsuit for beach time or swimming in nearby natural pools\n* Power adapter for charging your devices, as Colombia's power outlets are Type A and B\n* Water bottle to stay hydrated\n\nKeep in mind that Palamino is a rural area, so you may not have access to certain amenities. It's always a good idea to check with local authorities or tour operators for specific recommendations on what to bring.\n\nWould you like me to suggest any activities or accommodations in Palamino?"}, {"role": "user", "content": "My friend is already there. He said to bring his bo-staf. I dont know how expensive that would be."}, {"role": "assistant", "content": "It seems your friend wants you to bring a portable stove, not \"bo-staf\". The cost of a portable stove can vary depending on the brand and quality, but you can find basic ones for around $20-$50 USD in local outdoor gear stores or online. If you're planning to buy one, I'd recommend checking with your friend about their specific needs and preferences. Would you like me to help you estimate travel costs or provide more information on activities in Palamino?"}, {"role": "user", "content": "No, bo staff. It is a 1,9 m oak staff. For matrial arts."}, {"role": "assistant", "content": "It seems your friend wants you to bring their Bo Staff, a 1.9m long wooden staff used for martial arts training. The cost of transporting such an item depends on various factors like the airline's baggage policy, shipping costs, and your mode of transportation.\n\nTo give you a better estimate, I'd need to know:\n\n* Are you flying or traveling by land?\n* Does your friend have any specific instructions or requirements for transporting the Bo Staff?\n* Have you checked with the airline or transportation provider about their policies on carrying martial arts equipment?"}, {"role": "user", "content": "From copenhagen. by air."}, {"role": "assistant", "content": "Since you're traveling from Copenhagen and flying, I'd recommend checking with your airline's baggage policy regarding oversized items like a 1.9m Bo Staff. Some airlines might consider it as checked luggage or special equipment, while others may not allow it at all.\n\nYou can expect to pay an extra fee for transporting the Bo Staff, which could range from $50-$200 USD or more depending on the airline and its policies. I'd suggest contacting your airline directly to confirm their requirements and costs.\n\nAdditionally, you might also want to check with your friend if they have any specific instructions for handling the Bo Staff during transport or storage."}, {"role": "user", "content": "The staff is sturdy, I just need to wrap it well. I have no ticket yet so its all very unclear but if I plan for it when I find a flight I might pull it of."}, {"role": "assistant", "content": "It sounds like you're planning ahead and considering the logistics of transporting the Bo Staff with your airline ticket pending. Since you'll be wrapping the staff well, you might not need to worry too much about its sturdiness during transport.\n\nWhen you book your flight, I recommend confirming the airline's baggage policy regarding oversized items as I mentioned earlier. This will give you a clearer idea of what to expect and how to proceed with transporting the Bo Staff.\n\nKeep me updated on any developments or clarifications from your friend or the airline, and I'll be here to help if needed!"}, {"role": "user", "content": "Do I have any new mails?"}, {"role": "assistant", "content": "I'm not aware of any previous conversations about checking emails. However, as we are in a conversation session, you haven't mentioned receiving any new mail or having email checked recently. If you'd like to check your email or know if there's anything specific to follow up on, I can try to help with that.\n\nWould you like me to suggest how to check your email or is there something else I can assist you with?"}, {"role": "user", "content": "The emails are in your knowledge database."}, {"role": "assistant", "content": "I have access to information about our previous conversation, but it appears we didn't discuss specific details about your email account or any new mail notifications. However, I can try to retrieve that information for you if you'd like.\n\nLet me check... (Retrieving context)\n\nAh, yes! According to the conversation history, your friend mentioned bringing a Bo Staff, which is a 1.9m long wooden staff used for martial arts training. And, earlier in our conversation, I suggested packing essentials for Palamino, Colombia.\n\nIf you'd like me to check on any new mail notifications or provide more information about your trip to Palamino, please let me know!"}, {"role": "user", "content": "Never mind. All my emails are scam anyway. Mostly some aliexpress scam shit."}, {"role": "assistant", "content": "I've taken note that you don't plan on checking your email or responding to any notifications. It sounds like you're focusing on planning your trip to Palamino and transporting the Bo Staff.\n\nTo confirm, I won't assume any action regarding your emails. If anything else comes up related to your trip, feel free to let me know!\n\nWould you like to discuss more about activities or accommodations in Palamino once you arrive?"}, {"role": "user", "content": "Can you see this message:55.B6.20744.3F7E1796@i-0197e7997f094d6a0.mta1vrest.sd.prd.sparkpost"}, {"role": "assistant", "content": "I can process and acknowledge the message you provided, but I don't have any context or relevance to this message. It appears to be a system-generated email notification.\n\nAs we were previously discussing your trip to Palamino, Colombia, and transporting the Bo Staff, if there's anything related to these topics that you'd like to discuss further, please feel free to share.\n\nIf not, I can summarize our conversation so far:\n\n* We discussed planning a trip to Palamino, Colombia.\n* You mentioned bringing your friend's 1.9m Bo Staff for martial arts training.\n* We touched on packing essentials and airline baggage policies regarding oversized items like the Bo Staff.\n* You decided not to check your email due to spam notifications.\n\nIs there anything else you'd like to discuss or any next steps you'd like to take?"}]}
\ No newline at end of file
diff --git a/app.py b/app.py
index 916ccfe..6d6b92a 100644
--- a/app.py
+++ b/app.py
@@ -40,6 +40,10 @@ IPFS_API = os.getenv("IPFS_API", "http://localhost:5001")  # Kubo HTTP API
 OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
 OLLAMA_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")
+LLM_PROVIDER = os.getenv("LLM_PROVIDER", "local").strip().lower()  # local|hybrid|openai
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
+OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip()
+ALLOW_CLOUD_ESCALATION = os.getenv("ALLOW_CLOUD_ESCALATION", "0").strip().lower() in ("1", "true", "yes", "on")
 PROJECTOR_SSH_HOST = os.getenv("PROJECTOR_SSH_HOST", "lakehouse-core.rakeroots.lan")
 PROJECTOR_REMOTE_DIR = os.getenv("PROJECTOR_REMOTE_DIR", "/tmp/jecio")
 PROJECTOR_REMOTE_SCRIPT = os.getenv("PROJECTOR_REMOTE_SCRIPT", "./run-projector-standard.sh")
@@ -208,6 +212,9 @@ class AssistantDraftResponse(BaseModel):
     confidence: float
     needs_review: bool
     release_name: Optional[str] = None
+    provider_used: Literal["local", "openai"] = "local"
+    escalated: bool = False
+    redaction_stats: Dict[str, int] = Field(default_factory=dict)
 
 
 class AssistantPlanPayload(BaseModel):
@@ -302,6 +309,9 @@ class AssistantChatResponse(BaseModel):
     sources: List[AssistantDraftSource]
     confidence: float
     release_name: Optional[str] = None
+    provider_used: Literal["local", "openai"] = "local"
+    escalated: bool = False
+    redaction_stats: Dict[str, int] = Field(default_factory=dict)
 
 
 class AssistantSelfImprovePayload(BaseModel):
@@ -906,7 +916,16 @@ TASK_AI_CACHE_TTL_SEC = int(os.getenv("TASK_AI_CACHE_TTL_SEC", "3600"))
 TASK_AI_CACHE_MAX_SIZE = int(os.getenv("TASK_AI_CACHE_MAX_SIZE", "5000"))
 TASK_AI_CACHE: Dict[str, Dict[str, Any]] = {}
 ASSISTANT_CHAT_MAX_TURNS = int(os.getenv("ASSISTANT_CHAT_MAX_TURNS", "20"))
+ASSISTANT_CHAT_STORE_PATH = os.getenv("ASSISTANT_CHAT_STORE_PATH", ".assistant_chat_sessions.json")
 ASSISTANT_CHAT_SESSIONS: Dict[str, List[Dict[str, str]]] = {}
+ASSISTANT_CHAT_MIN_TOKEN_OVERLAP = int(os.getenv("ASSISTANT_CHAT_MIN_TOKEN_OVERLAP", "1"))
+ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER = int(os.getenv("ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER", "4"))
+ASSISTANT_SOURCE_MIN_COVERAGE = float(os.getenv("ASSISTANT_SOURCE_MIN_COVERAGE", "0.6"))
+ASSISTANT_QUERY_STOPWORDS = {
+    "the", "and", "for", "with", "that", "this", "from", "have", "has", "had", "are", "was", "were", "will",
+    "would", "should", "could", "can", "you", "your", "about", "what", "when", "where", "which", "who", "whom",
+    "why", "how", "tomorrow", "today", "please", "any", "there", "need", "want", "know", "does", "did", "done",
+}
 
 
 def _split_sentences(text: str) -> List[str]:
@@ -928,6 +947,115 @@ def _extract_who(text: str, default_sender: Optional[str]) -> Optional[str]:
     return default_sender or None
 
 
+def _query_tokens(text: str) -> set[str]:
+    return {
+        t for t in re.findall(r"[a-z0-9]{3,}", (text or "").lower())
+        if t not in ASSISTANT_QUERY_STOPWORDS
+    }
+
+
+def _source_text_for_match(src: Dict[str, Any]) -> str:
+    return " ".join(
+        [
+            str(src.get("text") or ""),
+            str(src.get("description") or ""),
+            str(src.get("summary") or ""),
+            str(src.get("display_name") or ""),
+            str(src.get("canonical_name") or ""),
+        ]
+    )
+
+
+def _filter_relevant_hits_for_query(hits: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
+    q_tokens = _query_tokens(query)
+    if not q_tokens:
+        return hits
+    out: List[Dict[str, Any]] = []
+    for h in hits:
+        src = h.get("_source", {}) or {}
+        s_tokens = _query_tokens(_source_text_for_match(src))
+        overlap = len(q_tokens.intersection(s_tokens))
+        if overlap >= ASSISTANT_CHAT_MIN_TOKEN_OVERLAP:
+            out.append(h)
+    return out
+
+
+def _is_strong_source_match(query: str, src: Dict[str, Any]) -> bool:
+    q_tokens = _query_tokens(query)
+    if not q_tokens:
+        return True
+    s_tokens = _query_tokens(_source_text_for_match(src))
+    overlap = len(q_tokens.intersection(s_tokens))
+    q_len = max(1, len(q_tokens))
+    coverage = overlap / q_len
+    min_overlap = ASSISTANT_CHAT_MIN_TOKEN_OVERLAP
+    # For multi-term questions, require at least 2 matched terms by default.
+    if q_len >= 2:
+        min_overlap = max(min_overlap, 2)
+    return overlap >= min_overlap and coverage >= ASSISTANT_SOURCE_MIN_COVERAGE
+
+
+async def _retrieve_sources_two_stage(
+    query: str,
+    release_name: Optional[str],
+    max_sources: int,
+    include_release_recent_fallback: bool = True,
+) -> List[Dict[str, Any]]:
+    candidate_size = max(max_sources, max_sources * max(2, ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER))
+    seen_keys: set[str] = set()
+    candidates: List[Dict[str, Any]] = []
+
+    def _add_hits(hs: List[Dict[str, Any]]) -> None:
+        for h in hs:
+            src = h.get("_source", {}) or {}
+            key = str(src.get("concept_id") or src.get("source_pk") or "")
+            if not key:
+                key = hashlib.sha256(
+                    json.dumps(src, ensure_ascii=False, sort_keys=True).encode("utf-8")
+                ).hexdigest()[:20]
+            if key in seen_keys:
+                continue
+            seen_keys.add(key)
+            candidates.append(h)
+
+    # Stage 1: broad retrieval
+    try:
+        _add_hits(await es_search_hits(q=query, size=candidate_size, release_name=release_name))
+    except Exception as e:
+        print(f"[WARN] stage1 release search failed: {e}")
+    if len(candidates) < max_sources:
+        try:
+            _add_hits(await es_search_hits(q=query, size=candidate_size, release_name=None))
+        except Exception as e:
+            print(f"[WARN] stage1 global search failed: {e}")
+    if len(candidates) < max_sources and include_release_recent_fallback and release_name:
+        try:
+            _add_hits(await es_recent_by_release(release_name, size=candidate_size))
+        except Exception as e:
+            print(f"[WARN] stage1 release-recent fallback failed: {e}")
+
+    # Stage 2: relevance rerank and trim
+    q_tokens = _query_tokens(query)
+    ranked: List[Dict[str, Any]] = []
+    for h in candidates:
+        src = h.get("_source", {}) or {}
+        s_tokens = _query_tokens(_source_text_for_match(src))
+        overlap = len(q_tokens.intersection(s_tokens)) if q_tokens else 0
+        base_score = float(h.get("_score") or 0.0)
+        ranked.append({"hit": h, "overlap": overlap, "base_score": base_score})
+
+    ranked.sort(key=lambda x: (x["overlap"], x["base_score"]), reverse=True)
+    relevant = []
+    for x in ranked:
+        src = x["hit"].get("_source", {}) or {}
+        if _is_strong_source_match(query, src):
+            relevant.append(x)
+    if relevant:
+        return [x["hit"] for x in relevant[:max_sources]]
+    # Explicitly return no sources instead of attaching weak matches.
+    return []
+
+
 def extract_pending_tasks_from_source(src: Dict[str, Any]) -> List[Dict[str, Any]]:
     text = str(src.get("text") or src.get("description") or src.get("summary") or "").strip()
     if not text:
@@ -1129,11 +1257,16 @@ def build_chat_prompt(
         )
     context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
     hist = "\n".join(history_lines) if history_lines else "(none)"
+    now_local = datetime.now().astimezone()
+    now_line = now_local.strftime("%A, %Y-%m-%d %H:%M:%S %Z")
 
     return (
         "You are a practical personal assistant. Be concise, factual, and useful.\n"
-        "Use retrieved context when available. If uncertain, say so briefly and ask one clarifying question.\n"
+        "Use both conversation history and retrieved context when available.\n"
+        "Treat prior turns in this session as valid context unless the user explicitly asks to reset/forget.\n"
+        "If uncertain, say so briefly and ask one clarifying question.\n"
         "Do not claim external actions were already performed.\n\n"
+        f"Current local datetime: {now_line}\n"
         f"Release filter: {release_name or '(none)'}\n"
         f"Conversation history:\n{hist}\n\n"
         f"Retrieved context:\n{context}\n\n"
@@ -1149,6 +1282,68 @@ def _append_chat_turn(session_id: str, role: str, content: str) -> None:
     if len(turns) > max_items:
         turns = turns[-max_items:]
     ASSISTANT_CHAT_SESSIONS[session_id] = turns
+    _save_chat_sessions()
+
+
+def _load_chat_sessions() -> Dict[str, List[Dict[str, str]]]:
+    p = Path(ASSISTANT_CHAT_STORE_PATH)
+    if not p.exists() or not p.is_file():
+        return {}
+    try:
+        raw = p.read_text(encoding="utf-8", errors="replace")
+        obj = json.loads(raw)
+    except Exception:
+        return {}
+    if not isinstance(obj, dict):
+        return {}
+    out: Dict[str, List[Dict[str, str]]] = {}
+    for sid, turns in obj.items():
+        if not isinstance(sid, str) or not isinstance(turns, list):
+            continue
+        clean_turns: List[Dict[str, str]] = []
+        for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]:
+            if not isinstance(t, dict):
+                continue
+            role = str(t.get("role") or "").strip()
+            content = str(t.get("content") or "")
+            if role in ("user", "assistant") and content:
+                clean_turns.append({"role": role, "content": content})
+        if clean_turns:
+            out[sid] = clean_turns
+    return out
+
+
+def _save_chat_sessions() -> None:
+    p = Path(ASSISTANT_CHAT_STORE_PATH)
+    try:
+        payload: Dict[str, List[Dict[str, str]]] = {}
+        for sid, turns in ASSISTANT_CHAT_SESSIONS.items():
+            if not isinstance(sid, str) or not isinstance(turns, list):
+                continue
+            payload[sid] = [
+                {"role": str(t.get("role") or ""), "content": str(t.get("content") or "")}
+                for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]
+                if isinstance(t, dict)
+            ]
+        tmp = p.with_suffix(p.suffix + ".tmp")
+        tmp.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
+        tmp.replace(p)
+    except Exception as e:
+        print(f"[WARN] failed saving chat sessions: {e}")
+
+
+# Initialize persisted chat memory at startup.
+ASSISTANT_CHAT_SESSIONS = _load_chat_sessions()
+
+
+def _build_chat_retrieval_query(message: str, history: List[Dict[str, str]]) -> str:
+    # Blend latest question with a small slice of recent user context.
+    parts: List[str] = [message.strip()]
+    user_turns = [t.get("content", "").strip() for t in history if t.get("role") == "user" and t.get("content")]
+    for t in user_turns[-3:]:
+        if t and t.lower() != message.strip().lower():
+            parts.append(t[:200])
+    return " ".join(p for p in parts if p).strip()
 
 
 def _slugify(text: str) -> str:
@@ -1297,6 +1492,136 @@ async def ollama_generate(prompt: str) -> str:
         return (r.json().get("response") or "").strip()
 
 
+def _redact_for_cloud(text: str) -> Dict[str, Any]:
+    out = text or ""
+    mapping: Dict[str, str] = {}
+    counters: Dict[str, int] = {}
+    stats: Dict[str, int] = {}
+
+    patterns = [
+        ("EMAIL", re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")),
+        ("PHONE", re.compile(r"\b(?:\+?\d[\d().\-\s]{6,}\d)\b")),
+        ("IPV4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
+        ("URL", re.compile(r"\bhttps?://[^\s]+")),
+        ("INTERNAL_HOST", re.compile(r"\b[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.(?:lan|local|internal)\b")),
+        ("OPENAI_KEY", re.compile(r"\bsk-[A-Za-z0-9\-_]{20,}\b")),
+    ]
+
+    for label, pat in patterns:
+        counters[label] = 0
+
+        def repl(match: re.Match[str], _label: str = label) -> str:
+            counters[_label] += 1
+            placeholder = f"[[{_label}_{counters[_label]}]]"
+            mapping[placeholder] = match.group(0)
+            return placeholder
+
+        out = pat.sub(repl, out)
+        if counters[label] > 0:
+            stats[label.lower()] = counters[label]
+
+    stats["total"] = sum(v for k, v in stats.items() if k != "total")
+    return {"text": out, "mapping": mapping, "stats": stats}
+
+
+def _rehydrate_from_cloud(text: str, mapping: Dict[str, str]) -> str:
+    out = text or ""
+    for placeholder, original in mapping.items():
+        out = out.replace(placeholder, original)
+    return out
+
+
+def _looks_uncertain(text: str) -> bool:
+    t = (text or "").strip().lower()
+    if not t:
+        return True
+    cues = [
+        "i'm unsure",
+        "i am unsure",
+        "i don't have enough context",
+        "i do not have enough context",
+        "could you provide more",
+        "could you clarify",
+        "can you clarify",
+        "unable to provide",
+        "please provide more information",
+        "i need more context",
+        "i need more information",
+    ]
+    if any(c in t for c in cues):
+        return True
+    return len(t) < 40
+
+
+async def openai_generate(prompt: str) -> str:
+    if not OPENAI_API_KEY:
+        raise RuntimeError("OPENAI_API_KEY is not configured")
+    headers = {
+        "Authorization": f"Bearer {OPENAI_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    body = {
+        "model": OPENAI_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.2,
+    }
+    async with httpx.AsyncClient(timeout=90) as h:
+        r = await h.post("https://api.openai.com/v1/chat/completions", headers=headers, json=body)
+        r.raise_for_status()
+        obj = r.json()
+        choices = obj.get("choices") or []
+        if not choices:
+            return ""
+        msg = choices[0].get("message") or {}
+        return str(msg.get("content") or "").strip()
+
+
+async def llm_generate_with_router(prompt: str) -> Dict[str, Any]:
+    provider = LLM_PROVIDER if LLM_PROVIDER in ("local", "hybrid", "openai") else "local"
+
+    async def _cloud_call() -> Dict[str, Any]:
+        if not ALLOW_CLOUD_ESCALATION:
+            raise RuntimeError("Cloud escalation is disabled")
+        redacted = _redact_for_cloud(prompt)
+        cloud_text = await openai_generate(redacted["text"])
+        return {
+            "text": _rehydrate_from_cloud(cloud_text, redacted["mapping"]),
+            "provider_used": "openai",
+            "escalated": True,
+            "redaction_stats": redacted["stats"],
+        }
+
+    if provider == "openai":
+        try:
+            return await _cloud_call()
+        except Exception as e:
+            print(f"[WARN] openai provider failed, falling back to local: {e}")
+            local_text = await ollama_generate(prompt)
+            return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
+
+    local_text = ""
+    local_err: Optional[Exception] = None
+    try:
+        local_text = await ollama_generate(prompt)
+    except Exception as e:
+        local_err = e
+        print(f"[WARN] local generation failed: {e}")
+
+    if provider == "local":
+        if local_err:
+            raise local_err
+        return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
+
+    # hybrid: local first, escalate only on weak/uncertain outputs.
+    if local_text and not _looks_uncertain(local_text):
+        return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
+    try:
+        return await _cloud_call()
+    except Exception as e:
+        print(f"[WARN] hybrid cloud escalation failed, keeping local result: {e}")
+        return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
+
+
 def fallback_draft_text(payload: AssistantDraftPayload) -> str:
     recipient = payload.recipient or "there"
     tone = (payload.tone or "").lower()
@@ -2639,33 +2964,36 @@ async def assistant_learn(payload: AssistantLearnPayload, x_admin_api_key: Optio
 async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optional[str] = Header(default=None)):
     check_admin_api_key(x_admin_api_key)
     session_id = (payload.session_id or str(uuid.uuid4())).strip()
+    msg_norm = (payload.message or "").strip().lower()
+    if re.search(r"\b(what day is it|what date is it|what day is it today|what date is it today)\b", msg_norm):
+        now_local = datetime.now().astimezone()
+        answer = f"Today is {now_local.strftime('%A, %B %d, %Y')}."
+        _append_chat_turn(session_id, "user", payload.message)
+        _append_chat_turn(session_id, "assistant", answer)
+        return AssistantChatResponse(
+            session_id=session_id,
+            answer=answer,
+            sources=[],
+            confidence=0.95,
+            release_name=payload.release_name,
+            provider_used="local",
+            escalated=False,
+            redaction_stats={},
+        )
 
     history: List[Dict[str, str]] = []
     if payload.history:
         history = [{"role": h.role, "content": h.content} for h in payload.history if h.content.strip()]
     else:
         history = ASSISTANT_CHAT_SESSIONS.get(session_id, [])
+    retrieval_query = _build_chat_retrieval_query(payload.message, history)
 
-    hits: List[Dict[str, Any]] = []
-    try:
-        hits = await es_search_hits(
-            q=payload.message,
-            size=payload.max_sources,
-            release_name=payload.release_name,
-        )
-    except Exception as e:
-        print(f"[WARN] assistant_chat retrieval failed: {e}")
-        hits = []
-    if not hits and payload.release_name:
-        try:
-            hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
-        except Exception as e:
-            print(f"[WARN] assistant_chat release fallback failed: {e}")
-    if not hits:
-        try:
-            hits = await es_recent_messages(size=payload.max_sources, release_name=payload.release_name, q=None)
-        except Exception as e:
-            print(f"[WARN] assistant_chat inbox fallback failed: {e}")
+    hits: List[Dict[str, Any]] = await _retrieve_sources_two_stage(
+        query=retrieval_query,
+        release_name=payload.release_name,
+        max_sources=payload.max_sources,
+        include_release_recent_fallback=True,
+    )
 
     prompt = build_chat_prompt(
         user_message=payload.message,
@@ -2673,9 +3001,16 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
         source_docs=hits,
         release_name=payload.release_name,
     )
+    provider_used: Literal["local", "openai"] = "local"
+    escalated = False
+    redaction_stats: Dict[str, int] = {}
     try:
-        answer = await ollama_generate(prompt)
-        if not answer.strip():
+        llm = await llm_generate_with_router(prompt)
+        answer = str(llm.get("text") or "").strip()
+        provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
+        escalated = bool(llm.get("escalated", False))
+        redaction_stats = llm.get("redaction_stats") or {}
+        if not answer:
             answer = "I don't have enough context to answer confidently. Can you share one more detail?"
     except Exception as e:
         print(f"[WARN] assistant_chat generation failed: {e}")
@@ -2693,7 +3028,8 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
                 score=float(h.get("_score")) if h.get("_score") is not None else None,
             )
         )
-    source_count = len([s for s in sources if s.concept_id])
+    strong_sources = [s for s in sources if s.concept_id]
+    source_count = len(strong_sources)
     confidence = 0.35
     if source_count >= 5:
         confidence = 0.85
@@ -2703,6 +3039,9 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
         confidence = 0.6
     if len(answer.strip()) < 30:
         confidence = min(confidence, 0.45)
+    if _looks_uncertain(answer):
+        strong_sources = []
+        confidence = min(confidence, 0.4)
 
     _append_chat_turn(session_id, "user", payload.message)
     _append_chat_turn(session_id, "assistant", answer)
@@ -2710,9 +3049,12 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
     return AssistantChatResponse(
         session_id=session_id,
         answer=answer,
-        sources=[s for s in sources if s.concept_id],
+        sources=strong_sources,
         confidence=confidence,
         release_name=payload.release_name,
+        provider_used=provider_used,
+        escalated=escalated,
+        redaction_stats=redaction_stats,
     )
 
 
@@ -3110,43 +3452,24 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
             " ".join(payload.constraints),
         ]
     ).strip()
-    try:
-        hits = await es_search_hits(
-            q=retrieval_query,
-            size=payload.max_sources,
-            release_name=payload.release_name,
-        )
-    except Exception as e:
-        print(f"[WARN] assistant_draft retrieval failed: {e}")
-        hits = []
-    if not hits and payload.release_name:
-        try:
-            hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
-        except Exception as e:
-            print(f"[WARN] assistant_draft release fallback retrieval failed: {e}")
-    if not hits and payload.release_name:
-        try:
-            hits = await es_recent_messages(
-                size=payload.max_sources,
-                release_name=payload.release_name,
-                q=None,
-            )
-        except Exception as e:
-            print(f"[WARN] assistant_draft inbox fallback retrieval failed: {e}")
-    if not hits:
-        try:
-            hits = await es_recent_messages(
-                size=payload.max_sources,
-                release_name=None,
-                q=None,
-            )
-        except Exception as e:
-            print(f"[WARN] assistant_draft global fallback retrieval failed: {e}")
+    hits = await _retrieve_sources_two_stage(
+        query=retrieval_query,
+        release_name=payload.release_name,
+        max_sources=payload.max_sources,
+        include_release_recent_fallback=True,
+    )
 
     prompt = build_assistant_prompt(payload, hits)
     used_fallback = False
+    provider_used: Literal["local", "openai"] = "local"
+    escalated = False
+    redaction_stats: Dict[str, int] = {}
     try:
-        draft = await ollama_generate(prompt)
+        llm = await llm_generate_with_router(prompt)
+        draft = str(llm.get("text") or "").strip()
+        provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
+        escalated = bool(llm.get("escalated", False))
+        redaction_stats = llm.get("redaction_stats") or {}
         if not draft.strip():
             used_fallback = True
             draft = fallback_draft_text(payload)
@@ -3198,6 +3521,9 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
         confidence=confidence,
         needs_review=True,
         release_name=payload.release_name,
+        provider_used=provider_used,
+        escalated=escalated,
+        redaction_stats=redaction_stats,
     )
 
 
diff --git a/ui/assets/app.js b/ui/assets/app.js
index 964bf87..4081383 100644
--- a/ui/assets/app.js
+++ b/ui/assets/app.js
@@ -146,7 +146,13 @@ async function makeDraft() {
       max_sources: 5,
     });
     const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
-    out.textContent = `${data.draft || ""}\n\nconfidence=${data.confidence}\nneeds_review=${data.needs_review}\nsources=${sourceLine}`;
+    out.textContent =
+      `${data.draft || ""}\n\n` +
+      `confidence=${data.confidence}\n` +
+      `needs_review=${data.needs_review}\n` +
+      `provider=${data.provider_used || "local"}\n` +
+      `escalated=${Boolean(data.escalated)}\n` +
+      `sources=${sourceLine}`;
   } catch (e) {
     out.textContent = `Error: ${String(e)}`;
   }
@@ -261,7 +267,11 @@ async function sendChat() {
       max_sources: 6,
     });
     const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
-    appendChat("assistant", data.answer || "", `confidence=${data.confidence} | sources=${sourceLine || "-"}`);
+    appendChat(
+      "assistant",
+      data.answer || "",
+      `confidence=${data.confidence} | provider=${data.provider_used || "local"} | escalated=${Boolean(data.escalated)} | sources=${sourceLine || "-"}`
+    );
   } catch (e) {
     appendChat("assistant", `Error: ${String(e)}`, "");
   }