Some tweaks.
This commit is contained in:
parent
f3e3f4f586
commit
89955662bf
1
.assistant_chat_sessions.json
Normal file
1
.assistant_chat_sessions.json
Normal file
File diff suppressed because one or more lines are too long
442
app.py
442
app.py
|
|
@ -40,6 +40,10 @@ IPFS_API = os.getenv("IPFS_API", "http://localhost:5001") # Kubo HTTP API
|
||||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
||||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
|
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
|
||||||
OLLAMA_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")
|
OLLAMA_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")
|
||||||
|
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "local").strip().lower() # local|hybrid|openai
|
||||||
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
|
||||||
|
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip()
|
||||||
|
ALLOW_CLOUD_ESCALATION = os.getenv("ALLOW_CLOUD_ESCALATION", "0").strip().lower() in ("1", "true", "yes", "on")
|
||||||
PROJECTOR_SSH_HOST = os.getenv("PROJECTOR_SSH_HOST", "lakehouse-core.rakeroots.lan")
|
PROJECTOR_SSH_HOST = os.getenv("PROJECTOR_SSH_HOST", "lakehouse-core.rakeroots.lan")
|
||||||
PROJECTOR_REMOTE_DIR = os.getenv("PROJECTOR_REMOTE_DIR", "/tmp/jecio")
|
PROJECTOR_REMOTE_DIR = os.getenv("PROJECTOR_REMOTE_DIR", "/tmp/jecio")
|
||||||
PROJECTOR_REMOTE_SCRIPT = os.getenv("PROJECTOR_REMOTE_SCRIPT", "./run-projector-standard.sh")
|
PROJECTOR_REMOTE_SCRIPT = os.getenv("PROJECTOR_REMOTE_SCRIPT", "./run-projector-standard.sh")
|
||||||
|
|
@ -208,6 +212,9 @@ class AssistantDraftResponse(BaseModel):
|
||||||
confidence: float
|
confidence: float
|
||||||
needs_review: bool
|
needs_review: bool
|
||||||
release_name: Optional[str] = None
|
release_name: Optional[str] = None
|
||||||
|
provider_used: Literal["local", "openai"] = "local"
|
||||||
|
escalated: bool = False
|
||||||
|
redaction_stats: Dict[str, int] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
class AssistantPlanPayload(BaseModel):
|
class AssistantPlanPayload(BaseModel):
|
||||||
|
|
@ -302,6 +309,9 @@ class AssistantChatResponse(BaseModel):
|
||||||
sources: List[AssistantDraftSource]
|
sources: List[AssistantDraftSource]
|
||||||
confidence: float
|
confidence: float
|
||||||
release_name: Optional[str] = None
|
release_name: Optional[str] = None
|
||||||
|
provider_used: Literal["local", "openai"] = "local"
|
||||||
|
escalated: bool = False
|
||||||
|
redaction_stats: Dict[str, int] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
class AssistantSelfImprovePayload(BaseModel):
|
class AssistantSelfImprovePayload(BaseModel):
|
||||||
|
|
@ -906,7 +916,16 @@ TASK_AI_CACHE_TTL_SEC = int(os.getenv("TASK_AI_CACHE_TTL_SEC", "3600"))
|
||||||
TASK_AI_CACHE_MAX_SIZE = int(os.getenv("TASK_AI_CACHE_MAX_SIZE", "5000"))
|
TASK_AI_CACHE_MAX_SIZE = int(os.getenv("TASK_AI_CACHE_MAX_SIZE", "5000"))
|
||||||
TASK_AI_CACHE: Dict[str, Dict[str, Any]] = {}
|
TASK_AI_CACHE: Dict[str, Dict[str, Any]] = {}
|
||||||
ASSISTANT_CHAT_MAX_TURNS = int(os.getenv("ASSISTANT_CHAT_MAX_TURNS", "20"))
|
ASSISTANT_CHAT_MAX_TURNS = int(os.getenv("ASSISTANT_CHAT_MAX_TURNS", "20"))
|
||||||
|
ASSISTANT_CHAT_STORE_PATH = os.getenv("ASSISTANT_CHAT_STORE_PATH", ".assistant_chat_sessions.json")
|
||||||
ASSISTANT_CHAT_SESSIONS: Dict[str, List[Dict[str, str]]] = {}
|
ASSISTANT_CHAT_SESSIONS: Dict[str, List[Dict[str, str]]] = {}
|
||||||
|
ASSISTANT_CHAT_MIN_TOKEN_OVERLAP = int(os.getenv("ASSISTANT_CHAT_MIN_TOKEN_OVERLAP", "1"))
|
||||||
|
ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER = int(os.getenv("ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER", "4"))
|
||||||
|
ASSISTANT_SOURCE_MIN_COVERAGE = float(os.getenv("ASSISTANT_SOURCE_MIN_COVERAGE", "0.6"))
|
||||||
|
ASSISTANT_QUERY_STOPWORDS = {
|
||||||
|
"the", "and", "for", "with", "that", "this", "from", "have", "has", "had", "are", "was", "were", "will",
|
||||||
|
"would", "should", "could", "can", "you", "your", "about", "what", "when", "where", "which", "who", "whom",
|
||||||
|
"why", "how", "tomorrow", "today", "please", "any", "there", "need", "want", "know", "does", "did", "done",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _split_sentences(text: str) -> List[str]:
|
def _split_sentences(text: str) -> List[str]:
|
||||||
|
|
@ -928,6 +947,115 @@ def _extract_who(text: str, default_sender: Optional[str]) -> Optional[str]:
|
||||||
return default_sender or None
|
return default_sender or None
|
||||||
|
|
||||||
|
|
||||||
|
def _query_tokens(text: str) -> set[str]:
|
||||||
|
return {
|
||||||
|
t for t in re.findall(r"[a-z0-9]{3,}", (text or "").lower())
|
||||||
|
if t not in ASSISTANT_QUERY_STOPWORDS
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _source_text_for_match(src: Dict[str, Any]) -> str:
|
||||||
|
return " ".join(
|
||||||
|
[
|
||||||
|
str(src.get("text") or ""),
|
||||||
|
str(src.get("description") or ""),
|
||||||
|
str(src.get("summary") or ""),
|
||||||
|
str(src.get("display_name") or ""),
|
||||||
|
str(src.get("canonical_name") or ""),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_relevant_hits_for_query(hits: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
|
||||||
|
q_tokens = _query_tokens(query)
|
||||||
|
if not q_tokens:
|
||||||
|
return hits
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for h in hits:
|
||||||
|
src = h.get("_source", {}) or {}
|
||||||
|
s_tokens = _query_tokens(_source_text_for_match(src))
|
||||||
|
overlap = len(q_tokens.intersection(s_tokens))
|
||||||
|
if overlap >= ASSISTANT_CHAT_MIN_TOKEN_OVERLAP:
|
||||||
|
out.append(h)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _is_strong_source_match(query: str, src: Dict[str, Any]) -> bool:
|
||||||
|
q_tokens = _query_tokens(query)
|
||||||
|
if not q_tokens:
|
||||||
|
return True
|
||||||
|
s_tokens = _query_tokens(_source_text_for_match(src))
|
||||||
|
overlap = len(q_tokens.intersection(s_tokens))
|
||||||
|
q_len = max(1, len(q_tokens))
|
||||||
|
coverage = overlap / q_len
|
||||||
|
min_overlap = ASSISTANT_CHAT_MIN_TOKEN_OVERLAP
|
||||||
|
# For multi-term questions, require at least 2 matched terms by default.
|
||||||
|
if q_len >= 2:
|
||||||
|
min_overlap = max(min_overlap, 2)
|
||||||
|
return overlap >= min_overlap and coverage >= ASSISTANT_SOURCE_MIN_COVERAGE
|
||||||
|
|
||||||
|
|
||||||
|
async def _retrieve_sources_two_stage(
|
||||||
|
query: str,
|
||||||
|
release_name: Optional[str],
|
||||||
|
max_sources: int,
|
||||||
|
include_release_recent_fallback: bool = True,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
candidate_size = max(max_sources, max_sources * max(2, ASSISTANT_SOURCE_CANDIDATE_MULTIPLIER))
|
||||||
|
seen_keys: set[str] = set()
|
||||||
|
candidates: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
def _add_hits(hs: List[Dict[str, Any]]) -> None:
|
||||||
|
for h in hs:
|
||||||
|
src = h.get("_source", {}) or {}
|
||||||
|
key = str(src.get("concept_id") or src.get("source_pk") or "")
|
||||||
|
if not key:
|
||||||
|
key = hashlib.sha256(
|
||||||
|
json.dumps(src, ensure_ascii=False, sort_keys=True).encode("utf-8")
|
||||||
|
).hexdigest()[:20]
|
||||||
|
if key in seen_keys:
|
||||||
|
continue
|
||||||
|
seen_keys.add(key)
|
||||||
|
candidates.append(h)
|
||||||
|
|
||||||
|
# Stage 1: broad retrieval
|
||||||
|
try:
|
||||||
|
_add_hits(await es_search_hits(q=query, size=candidate_size, release_name=release_name))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] stage1 release search failed: {e}")
|
||||||
|
if len(candidates) < max_sources:
|
||||||
|
try:
|
||||||
|
_add_hits(await es_search_hits(q=query, size=candidate_size, release_name=None))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] stage1 global search failed: {e}")
|
||||||
|
if len(candidates) < max_sources and include_release_recent_fallback and release_name:
|
||||||
|
try:
|
||||||
|
_add_hits(await es_recent_by_release(release_name, size=candidate_size))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] stage1 release-recent fallback failed: {e}")
|
||||||
|
|
||||||
|
# Stage 2: relevance rerank and trim
|
||||||
|
q_tokens = _query_tokens(query)
|
||||||
|
ranked: List[Dict[str, Any]] = []
|
||||||
|
for h in candidates:
|
||||||
|
src = h.get("_source", {}) or {}
|
||||||
|
s_tokens = _query_tokens(_source_text_for_match(src))
|
||||||
|
overlap = len(q_tokens.intersection(s_tokens)) if q_tokens else 0
|
||||||
|
base_score = float(h.get("_score") or 0.0)
|
||||||
|
ranked.append({"hit": h, "overlap": overlap, "base_score": base_score})
|
||||||
|
|
||||||
|
ranked.sort(key=lambda x: (x["overlap"], x["base_score"]), reverse=True)
|
||||||
|
relevant = []
|
||||||
|
for x in ranked:
|
||||||
|
src = x["hit"].get("_source", {}) or {}
|
||||||
|
if _is_strong_source_match(query, src):
|
||||||
|
relevant.append(x)
|
||||||
|
if relevant:
|
||||||
|
return [x["hit"] for x in relevant[:max_sources]]
|
||||||
|
# Explicitly return no sources instead of attaching weak matches.
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def extract_pending_tasks_from_source(src: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def extract_pending_tasks_from_source(src: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
text = str(src.get("text") or src.get("description") or src.get("summary") or "").strip()
|
text = str(src.get("text") or src.get("description") or src.get("summary") or "").strip()
|
||||||
if not text:
|
if not text:
|
||||||
|
|
@ -1129,11 +1257,16 @@ def build_chat_prompt(
|
||||||
)
|
)
|
||||||
context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
|
context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
|
||||||
hist = "\n".join(history_lines) if history_lines else "(none)"
|
hist = "\n".join(history_lines) if history_lines else "(none)"
|
||||||
|
now_local = datetime.now().astimezone()
|
||||||
|
now_line = now_local.strftime("%A, %Y-%m-%d %H:%M:%S %Z")
|
||||||
|
|
||||||
return (
|
return (
|
||||||
"You are a practical personal assistant. Be concise, factual, and useful.\n"
|
"You are a practical personal assistant. Be concise, factual, and useful.\n"
|
||||||
"Use retrieved context when available. If uncertain, say so briefly and ask one clarifying question.\n"
|
"Use both conversation history and retrieved context when available.\n"
|
||||||
|
"Treat prior turns in this session as valid context unless the user explicitly asks to reset/forget.\n"
|
||||||
|
"If uncertain, say so briefly and ask one clarifying question.\n"
|
||||||
"Do not claim external actions were already performed.\n\n"
|
"Do not claim external actions were already performed.\n\n"
|
||||||
|
f"Current local datetime: {now_line}\n"
|
||||||
f"Release filter: {release_name or '(none)'}\n"
|
f"Release filter: {release_name or '(none)'}\n"
|
||||||
f"Conversation history:\n{hist}\n\n"
|
f"Conversation history:\n{hist}\n\n"
|
||||||
f"Retrieved context:\n{context}\n\n"
|
f"Retrieved context:\n{context}\n\n"
|
||||||
|
|
@ -1149,6 +1282,68 @@ def _append_chat_turn(session_id: str, role: str, content: str) -> None:
|
||||||
if len(turns) > max_items:
|
if len(turns) > max_items:
|
||||||
turns = turns[-max_items:]
|
turns = turns[-max_items:]
|
||||||
ASSISTANT_CHAT_SESSIONS[session_id] = turns
|
ASSISTANT_CHAT_SESSIONS[session_id] = turns
|
||||||
|
_save_chat_sessions()
|
||||||
|
|
||||||
|
|
||||||
|
def _load_chat_sessions() -> Dict[str, List[Dict[str, str]]]:
|
||||||
|
p = Path(ASSISTANT_CHAT_STORE_PATH)
|
||||||
|
if not p.exists() or not p.is_file():
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
raw = p.read_text(encoding="utf-8", errors="replace")
|
||||||
|
obj = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
if not isinstance(obj, dict):
|
||||||
|
return {}
|
||||||
|
out: Dict[str, List[Dict[str, str]]] = {}
|
||||||
|
for sid, turns in obj.items():
|
||||||
|
if not isinstance(sid, str) or not isinstance(turns, list):
|
||||||
|
continue
|
||||||
|
clean_turns: List[Dict[str, str]] = []
|
||||||
|
for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]:
|
||||||
|
if not isinstance(t, dict):
|
||||||
|
continue
|
||||||
|
role = str(t.get("role") or "").strip()
|
||||||
|
content = str(t.get("content") or "")
|
||||||
|
if role in ("user", "assistant") and content:
|
||||||
|
clean_turns.append({"role": role, "content": content})
|
||||||
|
if clean_turns:
|
||||||
|
out[sid] = clean_turns
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _save_chat_sessions() -> None:
|
||||||
|
p = Path(ASSISTANT_CHAT_STORE_PATH)
|
||||||
|
try:
|
||||||
|
payload: Dict[str, List[Dict[str, str]]] = {}
|
||||||
|
for sid, turns in ASSISTANT_CHAT_SESSIONS.items():
|
||||||
|
if not isinstance(sid, str) or not isinstance(turns, list):
|
||||||
|
continue
|
||||||
|
payload[sid] = [
|
||||||
|
{"role": str(t.get("role") or ""), "content": str(t.get("content") or "")}
|
||||||
|
for t in turns[-ASSISTANT_CHAT_MAX_TURNS * 2 :]
|
||||||
|
if isinstance(t, dict)
|
||||||
|
]
|
||||||
|
tmp = p.with_suffix(p.suffix + ".tmp")
|
||||||
|
tmp.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
|
||||||
|
tmp.replace(p)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] failed saving chat sessions: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize persisted chat memory at startup.
|
||||||
|
ASSISTANT_CHAT_SESSIONS = _load_chat_sessions()
|
||||||
|
|
||||||
|
|
||||||
|
def _build_chat_retrieval_query(message: str, history: List[Dict[str, str]]) -> str:
|
||||||
|
# Blend latest question with a small slice of recent user context.
|
||||||
|
parts: List[str] = [message.strip()]
|
||||||
|
user_turns = [t.get("content", "").strip() for t in history if t.get("role") == "user" and t.get("content")]
|
||||||
|
for t in user_turns[-3:]:
|
||||||
|
if t and t.lower() != message.strip().lower():
|
||||||
|
parts.append(t[:200])
|
||||||
|
return " ".join(p for p in parts if p).strip()
|
||||||
|
|
||||||
|
|
||||||
def _slugify(text: str) -> str:
|
def _slugify(text: str) -> str:
|
||||||
|
|
@ -1297,6 +1492,136 @@ async def ollama_generate(prompt: str) -> str:
|
||||||
return (r.json().get("response") or "").strip()
|
return (r.json().get("response") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _redact_for_cloud(text: str) -> Dict[str, Any]:
|
||||||
|
out = text or ""
|
||||||
|
mapping: Dict[str, str] = {}
|
||||||
|
counters: Dict[str, int] = {}
|
||||||
|
stats: Dict[str, int] = {}
|
||||||
|
|
||||||
|
patterns = [
|
||||||
|
("EMAIL", re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")),
|
||||||
|
("PHONE", re.compile(r"\b(?:\+?\d[\d().\-\s]{6,}\d)\b")),
|
||||||
|
("IPV4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
|
||||||
|
("URL", re.compile(r"\bhttps?://[^\s]+")),
|
||||||
|
("INTERNAL_HOST", re.compile(r"\b[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.(?:lan|local|internal)\b")),
|
||||||
|
("OPENAI_KEY", re.compile(r"\bsk-[A-Za-z0-9\-_]{20,}\b")),
|
||||||
|
]
|
||||||
|
|
||||||
|
for label, pat in patterns:
|
||||||
|
counters[label] = 0
|
||||||
|
|
||||||
|
def repl(match: re.Match[str], _label: str = label) -> str:
|
||||||
|
counters[_label] += 1
|
||||||
|
placeholder = f"[[{_label}_{counters[_label]}]]"
|
||||||
|
mapping[placeholder] = match.group(0)
|
||||||
|
return placeholder
|
||||||
|
|
||||||
|
out = pat.sub(repl, out)
|
||||||
|
if counters[label] > 0:
|
||||||
|
stats[label.lower()] = counters[label]
|
||||||
|
|
||||||
|
stats["total"] = sum(v for k, v in stats.items() if k != "total")
|
||||||
|
return {"text": out, "mapping": mapping, "stats": stats}
|
||||||
|
|
||||||
|
|
||||||
|
def _rehydrate_from_cloud(text: str, mapping: Dict[str, str]) -> str:
|
||||||
|
out = text or ""
|
||||||
|
for placeholder, original in mapping.items():
|
||||||
|
out = out.replace(placeholder, original)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_uncertain(text: str) -> bool:
|
||||||
|
t = (text or "").strip().lower()
|
||||||
|
if not t:
|
||||||
|
return True
|
||||||
|
cues = [
|
||||||
|
"i'm unsure",
|
||||||
|
"i am unsure",
|
||||||
|
"i don't have enough context",
|
||||||
|
"i do not have enough context",
|
||||||
|
"could you provide more",
|
||||||
|
"could you clarify",
|
||||||
|
"can you clarify",
|
||||||
|
"unable to provide",
|
||||||
|
"please provide more information",
|
||||||
|
"i need more context",
|
||||||
|
"i need more information",
|
||||||
|
]
|
||||||
|
if any(c in t for c in cues):
|
||||||
|
return True
|
||||||
|
return len(t) < 40
|
||||||
|
|
||||||
|
|
||||||
|
async def openai_generate(prompt: str) -> str:
|
||||||
|
if not OPENAI_API_KEY:
|
||||||
|
raise RuntimeError("OPENAI_API_KEY is not configured")
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
body = {
|
||||||
|
"model": OPENAI_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"temperature": 0.2,
|
||||||
|
}
|
||||||
|
async with httpx.AsyncClient(timeout=90) as h:
|
||||||
|
r = await h.post("https://api.openai.com/v1/chat/completions", headers=headers, json=body)
|
||||||
|
r.raise_for_status()
|
||||||
|
obj = r.json()
|
||||||
|
choices = obj.get("choices") or []
|
||||||
|
if not choices:
|
||||||
|
return ""
|
||||||
|
msg = choices[0].get("message") or {}
|
||||||
|
return str(msg.get("content") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_generate_with_router(prompt: str) -> Dict[str, Any]:
|
||||||
|
provider = LLM_PROVIDER if LLM_PROVIDER in ("local", "hybrid", "openai") else "local"
|
||||||
|
|
||||||
|
async def _cloud_call() -> Dict[str, Any]:
|
||||||
|
if not ALLOW_CLOUD_ESCALATION:
|
||||||
|
raise RuntimeError("Cloud escalation is disabled")
|
||||||
|
redacted = _redact_for_cloud(prompt)
|
||||||
|
cloud_text = await openai_generate(redacted["text"])
|
||||||
|
return {
|
||||||
|
"text": _rehydrate_from_cloud(cloud_text, redacted["mapping"]),
|
||||||
|
"provider_used": "openai",
|
||||||
|
"escalated": True,
|
||||||
|
"redaction_stats": redacted["stats"],
|
||||||
|
}
|
||||||
|
|
||||||
|
if provider == "openai":
|
||||||
|
try:
|
||||||
|
return await _cloud_call()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] openai provider failed, falling back to local: {e}")
|
||||||
|
local_text = await ollama_generate(prompt)
|
||||||
|
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
|
||||||
|
|
||||||
|
local_text = ""
|
||||||
|
local_err: Optional[Exception] = None
|
||||||
|
try:
|
||||||
|
local_text = await ollama_generate(prompt)
|
||||||
|
except Exception as e:
|
||||||
|
local_err = e
|
||||||
|
print(f"[WARN] local generation failed: {e}")
|
||||||
|
|
||||||
|
if provider == "local":
|
||||||
|
if local_err:
|
||||||
|
raise local_err
|
||||||
|
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
|
||||||
|
|
||||||
|
# hybrid: local first, escalate only on weak/uncertain outputs.
|
||||||
|
if local_text and not _looks_uncertain(local_text):
|
||||||
|
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
|
||||||
|
try:
|
||||||
|
return await _cloud_call()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] hybrid cloud escalation failed, keeping local result: {e}")
|
||||||
|
return {"text": local_text, "provider_used": "local", "escalated": False, "redaction_stats": {}}
|
||||||
|
|
||||||
|
|
||||||
def fallback_draft_text(payload: AssistantDraftPayload) -> str:
|
def fallback_draft_text(payload: AssistantDraftPayload) -> str:
|
||||||
recipient = payload.recipient or "there"
|
recipient = payload.recipient or "there"
|
||||||
tone = (payload.tone or "").lower()
|
tone = (payload.tone or "").lower()
|
||||||
|
|
@ -2639,33 +2964,36 @@ async def assistant_learn(payload: AssistantLearnPayload, x_admin_api_key: Optio
|
||||||
async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optional[str] = Header(default=None)):
|
async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optional[str] = Header(default=None)):
|
||||||
check_admin_api_key(x_admin_api_key)
|
check_admin_api_key(x_admin_api_key)
|
||||||
session_id = (payload.session_id or str(uuid.uuid4())).strip()
|
session_id = (payload.session_id or str(uuid.uuid4())).strip()
|
||||||
|
msg_norm = (payload.message or "").strip().lower()
|
||||||
|
if re.search(r"\b(what day is it|what date is it|what day is it today|what date is it today)\b", msg_norm):
|
||||||
|
now_local = datetime.now().astimezone()
|
||||||
|
answer = f"Today is {now_local.strftime('%A, %B %d, %Y')}."
|
||||||
|
_append_chat_turn(session_id, "user", payload.message)
|
||||||
|
_append_chat_turn(session_id, "assistant", answer)
|
||||||
|
return AssistantChatResponse(
|
||||||
|
session_id=session_id,
|
||||||
|
answer=answer,
|
||||||
|
sources=[],
|
||||||
|
confidence=0.95,
|
||||||
|
release_name=payload.release_name,
|
||||||
|
provider_used="local",
|
||||||
|
escalated=False,
|
||||||
|
redaction_stats={},
|
||||||
|
)
|
||||||
|
|
||||||
history: List[Dict[str, str]] = []
|
history: List[Dict[str, str]] = []
|
||||||
if payload.history:
|
if payload.history:
|
||||||
history = [{"role": h.role, "content": h.content} for h in payload.history if h.content.strip()]
|
history = [{"role": h.role, "content": h.content} for h in payload.history if h.content.strip()]
|
||||||
else:
|
else:
|
||||||
history = ASSISTANT_CHAT_SESSIONS.get(session_id, [])
|
history = ASSISTANT_CHAT_SESSIONS.get(session_id, [])
|
||||||
|
retrieval_query = _build_chat_retrieval_query(payload.message, history)
|
||||||
|
|
||||||
hits: List[Dict[str, Any]] = []
|
hits: List[Dict[str, Any]] = await _retrieve_sources_two_stage(
|
||||||
try:
|
query=retrieval_query,
|
||||||
hits = await es_search_hits(
|
release_name=payload.release_name,
|
||||||
q=payload.message,
|
max_sources=payload.max_sources,
|
||||||
size=payload.max_sources,
|
include_release_recent_fallback=True,
|
||||||
release_name=payload.release_name,
|
)
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_chat retrieval failed: {e}")
|
|
||||||
hits = []
|
|
||||||
if not hits and payload.release_name:
|
|
||||||
try:
|
|
||||||
hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_chat release fallback failed: {e}")
|
|
||||||
if not hits:
|
|
||||||
try:
|
|
||||||
hits = await es_recent_messages(size=payload.max_sources, release_name=payload.release_name, q=None)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_chat inbox fallback failed: {e}")
|
|
||||||
|
|
||||||
prompt = build_chat_prompt(
|
prompt = build_chat_prompt(
|
||||||
user_message=payload.message,
|
user_message=payload.message,
|
||||||
|
|
@ -2673,9 +3001,16 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
|
||||||
source_docs=hits,
|
source_docs=hits,
|
||||||
release_name=payload.release_name,
|
release_name=payload.release_name,
|
||||||
)
|
)
|
||||||
|
provider_used: Literal["local", "openai"] = "local"
|
||||||
|
escalated = False
|
||||||
|
redaction_stats: Dict[str, int] = {}
|
||||||
try:
|
try:
|
||||||
answer = await ollama_generate(prompt)
|
llm = await llm_generate_with_router(prompt)
|
||||||
if not answer.strip():
|
answer = str(llm.get("text") or "").strip()
|
||||||
|
provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
|
||||||
|
escalated = bool(llm.get("escalated", False))
|
||||||
|
redaction_stats = llm.get("redaction_stats") or {}
|
||||||
|
if not answer:
|
||||||
answer = "I don't have enough context to answer confidently. Can you share one more detail?"
|
answer = "I don't have enough context to answer confidently. Can you share one more detail?"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] assistant_chat generation failed: {e}")
|
print(f"[WARN] assistant_chat generation failed: {e}")
|
||||||
|
|
@ -2693,7 +3028,8 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
|
||||||
score=float(h.get("_score")) if h.get("_score") is not None else None,
|
score=float(h.get("_score")) if h.get("_score") is not None else None,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
source_count = len([s for s in sources if s.concept_id])
|
strong_sources = [s for s in sources if s.concept_id]
|
||||||
|
source_count = len(strong_sources)
|
||||||
confidence = 0.35
|
confidence = 0.35
|
||||||
if source_count >= 5:
|
if source_count >= 5:
|
||||||
confidence = 0.85
|
confidence = 0.85
|
||||||
|
|
@ -2703,6 +3039,9 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
|
||||||
confidence = 0.6
|
confidence = 0.6
|
||||||
if len(answer.strip()) < 30:
|
if len(answer.strip()) < 30:
|
||||||
confidence = min(confidence, 0.45)
|
confidence = min(confidence, 0.45)
|
||||||
|
if _looks_uncertain(answer):
|
||||||
|
strong_sources = []
|
||||||
|
confidence = min(confidence, 0.4)
|
||||||
|
|
||||||
_append_chat_turn(session_id, "user", payload.message)
|
_append_chat_turn(session_id, "user", payload.message)
|
||||||
_append_chat_turn(session_id, "assistant", answer)
|
_append_chat_turn(session_id, "assistant", answer)
|
||||||
|
|
@ -2710,9 +3049,12 @@ async def assistant_chat(payload: AssistantChatPayload, x_admin_api_key: Optiona
|
||||||
return AssistantChatResponse(
|
return AssistantChatResponse(
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
answer=answer,
|
answer=answer,
|
||||||
sources=[s for s in sources if s.concept_id],
|
sources=strong_sources,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
release_name=payload.release_name,
|
release_name=payload.release_name,
|
||||||
|
provider_used=provider_used,
|
||||||
|
escalated=escalated,
|
||||||
|
redaction_stats=redaction_stats,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -3110,43 +3452,24 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
|
||||||
" ".join(payload.constraints),
|
" ".join(payload.constraints),
|
||||||
]
|
]
|
||||||
).strip()
|
).strip()
|
||||||
try:
|
hits = await _retrieve_sources_two_stage(
|
||||||
hits = await es_search_hits(
|
query=retrieval_query,
|
||||||
q=retrieval_query,
|
release_name=payload.release_name,
|
||||||
size=payload.max_sources,
|
max_sources=payload.max_sources,
|
||||||
release_name=payload.release_name,
|
include_release_recent_fallback=True,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_draft retrieval failed: {e}")
|
|
||||||
hits = []
|
|
||||||
if not hits and payload.release_name:
|
|
||||||
try:
|
|
||||||
hits = await es_recent_by_release(payload.release_name, size=payload.max_sources)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_draft release fallback retrieval failed: {e}")
|
|
||||||
if not hits and payload.release_name:
|
|
||||||
try:
|
|
||||||
hits = await es_recent_messages(
|
|
||||||
size=payload.max_sources,
|
|
||||||
release_name=payload.release_name,
|
|
||||||
q=None,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_draft inbox fallback retrieval failed: {e}")
|
|
||||||
if not hits:
|
|
||||||
try:
|
|
||||||
hits = await es_recent_messages(
|
|
||||||
size=payload.max_sources,
|
|
||||||
release_name=None,
|
|
||||||
q=None,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] assistant_draft global fallback retrieval failed: {e}")
|
|
||||||
|
|
||||||
prompt = build_assistant_prompt(payload, hits)
|
prompt = build_assistant_prompt(payload, hits)
|
||||||
used_fallback = False
|
used_fallback = False
|
||||||
|
provider_used: Literal["local", "openai"] = "local"
|
||||||
|
escalated = False
|
||||||
|
redaction_stats: Dict[str, int] = {}
|
||||||
try:
|
try:
|
||||||
draft = await ollama_generate(prompt)
|
llm = await llm_generate_with_router(prompt)
|
||||||
|
draft = str(llm.get("text") or "").strip()
|
||||||
|
provider_used = "openai" if llm.get("provider_used") == "openai" else "local"
|
||||||
|
escalated = bool(llm.get("escalated", False))
|
||||||
|
redaction_stats = llm.get("redaction_stats") or {}
|
||||||
if not draft.strip():
|
if not draft.strip():
|
||||||
used_fallback = True
|
used_fallback = True
|
||||||
draft = fallback_draft_text(payload)
|
draft = fallback_draft_text(payload)
|
||||||
|
|
@ -3198,6 +3521,9 @@ async def assistant_draft(payload: AssistantDraftPayload, x_admin_api_key: Optio
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
needs_review=True,
|
needs_review=True,
|
||||||
release_name=payload.release_name,
|
release_name=payload.release_name,
|
||||||
|
provider_used=provider_used,
|
||||||
|
escalated=escalated,
|
||||||
|
redaction_stats=redaction_stats,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,13 @@ async function makeDraft() {
|
||||||
max_sources: 5,
|
max_sources: 5,
|
||||||
});
|
});
|
||||||
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
|
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 5).join(", ");
|
||||||
out.textContent = `${data.draft || ""}\n\nconfidence=${data.confidence}\nneeds_review=${data.needs_review}\nsources=${sourceLine}`;
|
out.textContent =
|
||||||
|
`${data.draft || ""}\n\n` +
|
||||||
|
`confidence=${data.confidence}\n` +
|
||||||
|
`needs_review=${data.needs_review}\n` +
|
||||||
|
`provider=${data.provider_used || "local"}\n` +
|
||||||
|
`escalated=${Boolean(data.escalated)}\n` +
|
||||||
|
`sources=${sourceLine}`;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
out.textContent = `Error: ${String(e)}`;
|
out.textContent = `Error: ${String(e)}`;
|
||||||
}
|
}
|
||||||
|
|
@ -261,7 +267,11 @@ async function sendChat() {
|
||||||
max_sources: 6,
|
max_sources: 6,
|
||||||
});
|
});
|
||||||
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
|
const sourceLine = (data.sources || []).map((s) => s.concept_id).filter(Boolean).slice(0, 4).join(", ");
|
||||||
appendChat("assistant", data.answer || "", `confidence=${data.confidence} | sources=${sourceLine || "-"}`);
|
appendChat(
|
||||||
|
"assistant",
|
||||||
|
data.answer || "",
|
||||||
|
`confidence=${data.confidence} | provider=${data.provider_used || "local"} | escalated=${Boolean(data.escalated)} | sources=${sourceLine || "-"}`
|
||||||
|
);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
appendChat("assistant", `Error: ${String(e)}`, "");
|
appendChat("assistant", `Error: ${String(e)}`, "");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue