refactor(app): extract assistant retrieval/planning/remote/imap services

2026-02-15 01:07:29 +01:00 · 2026-02-15 01:07:29 +01:00 · 106983eee6
parent 89955662bf
commit 106983eee6
6 changed files with 1102 additions and 1050 deletions
--- a/app.py
+++ b/app.py
--- a/services/init.py
+++ b/services/init.py
@ -0,0 +1,2 @@
 """Service-layer modules for assistant backend."""
--- a/services/assistant_planning.py
+++ b/services/assistant_planning.py
@ -0,0 +1,140 @@
 from typing import Any, Awaitable, Callable, Dict, List, Optional
 def _get(obj: Any, name: str, default: Any = None) -> Any:
    return getattr(obj, name, default)
 def build_assistant_plan_prompt(payload: Any, source_docs: List[Dict[str, Any]]) -> str:
    constraints = _get(payload, "constraints", []) or []
    constraint_lines = "\n".join(f"- {c}" for c in constraints) if constraints else "- None"
    context_chunks = []
    for d in source_docs:
        src = d.get("_source", {}) or {}
        context_chunks.append(
            "\n".join(
                [
                    f"concept_id: {src.get('concept_id', '')}",
                    f"source_pk: {src.get('source_pk', '')}",
                    f"source_table: {src.get('source_table', '')}",
                    f"release_name: {src.get('release_name', '')}",
                    f"text: {str(src.get('text') or src.get('description') or src.get('summary') or '')[:600]}",
                ]
            )
        )
    context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
    return (
        "You are a cautious personal assistant planner. Produce an execution plan only; do not execute anything.\n"
        "Return valid JSON ONLY with this exact shape:\n"
        '{'
        '"plan": ['
        '{"step_id":"S1","title":"...","action_type":"research|draft|ask_user|prepare_data|review","requires_approval":true|false,"notes":"..."}'
        "]"
        "}\n"
        f"Use at most {_get(payload, 'max_steps', 3)} steps.\n"
        "Prefer safe read-only and draft actions first.\n\n"
        f"Task type: {_get(payload, 'task_type', '')}\n"
        f"Objective: {_get(payload, 'objective', '')}\n"
        f"Constraints:\n{constraint_lines}\n\n"
        "Retrieved context:\n"
        f"{context}\n"
    )
 def fallback_plan(max_steps: int) -> List[Dict[str, Any]]:
    return [
        {
            "step_id": "S1",
            "title": "Gather relevant facts and constraints",
            "action_type": "research",
            "requires_approval": False,
            "notes": "Review messages/concepts and identify required context.",
        },
        {
            "step_id": "S2",
            "title": "Draft a response or action proposal",
            "action_type": "draft",
            "requires_approval": False,
            "notes": "Produce a concise draft aligned with objective and constraints.",
        },
        {
            "step_id": "S3",
            "title": "Request user confirmation before any external action",
            "action_type": "ask_user",
            "requires_approval": True,
            "notes": "Do not send or execute changes until approved.",
        },
    ][: max_steps]
 def find_plan_step(plan: List[Any], step_id: str) -> Optional[Any]:
    for s in plan:
        if _get(s, "step_id", "") == step_id:
            return s
    return None
 def is_high_risk_step(step: Any) -> bool:
    text = f"{_get(step, 'title', '')} {_get(step, 'notes', '') or ''}".lower()
    high_risk_terms = [
        "send",
        "submit",
        "pay",
        "payment",
        "transfer",
        "wire",
        "sign",
        "file",
        "delete",
        "close account",
        "change account",
    ]
    return any(t in text for t in high_risk_terms)
 def enforce_step_policy(payload: Any, step: Any) -> Optional[str]:
    if bool(_get(step, "requires_approval", False)) and not bool(_get(payload, "approved", False)):
        return "Step requires approval but approved=false."
    if is_high_risk_step(step):
        if not bool(_get(payload, "approved", False)):
            return "High-risk step requires approved=true."
        token = str(_get(payload, "manual_confirm_token", "") or "")
        if not token.strip():
            return "High-risk step requires manual_confirm_token."
    return None
 async def execute_plan_step(
    payload: Any,
    step: Any,
    generate_text: Callable[[str], Awaitable[str]],
 ) -> Dict[str, Any]:
    action_type = str(_get(step, "action_type", ""))
    title = str(_get(step, "title", ""))
    notes = str(_get(step, "notes", "") or "")
    if action_type == "draft":
        prompt = (
            "Draft concise text for this approved planning step.\n"
            f"Task type: {_get(payload, 'task_type', '')}\n"
            f"Objective: {_get(payload, 'objective', '')}\n"
            f"Step: {title}\n"
            f"Notes: {notes}\n"
            "Output only final draft text."
        )
        try:
            text = await generate_text(prompt)
            if not text.strip():
                text = f"Draft for step '{title}'."
        except Exception:
            text = f"Draft for step '{title}'."
        return {"draft": text}
    if action_type == "research":
        return {"note": "Research step acknowledged. Use /search or /assistant/draft for grounded retrieval."}
    if action_type == "prepare_data":
        return {"note": "Prepare-data step acknowledged.", "checklist": ["Collect required inputs", "Normalize format", "Validate completeness"]}
    if action_type == "review":
        return {"note": "Review step requires human review before external action."}
    if action_type == "ask_user":
        return {"question": "Please confirm whether to proceed with the next high-impact action."}
    return {"note": "Step recognized but no executor implemented."}
--- a/services/assistant_retrieval.py
+++ b/services/assistant_retrieval.py
@ -0,0 +1,115 @@
 import hashlib
 import json
 import re
 from dataclasses import dataclass
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
 DEFAULT_QUERY_STOPWORDS: Set[str] = {
    "the", "and", "for", "with", "that", "this", "from", "have", "has", "had", "are", "was", "were", "will",
    "would", "should", "could", "can", "you", "your", "about", "what", "when", "where", "which", "who", "whom",
    "why", "how", "tomorrow", "today", "please", "any", "there", "need", "want", "know", "does", "did", "done",
 }
@dataclass(frozen=True)
 class AssistantRetrievalConfig:
    min_token_overlap: int = 1
    source_candidate_multiplier: int = 4
    source_min_coverage: float = 0.6
    query_stopwords: Set[str] = frozenset(DEFAULT_QUERY_STOPWORDS)
 def query_tokens(text: str, stopwords: Set[str]) -> Set[str]:
    return {
        t for t in re.findall(r"[a-z0-9]{3,}", (text or "").lower())
        if t not in stopwords
    }
 def source_text_for_match(src: Dict[str, Any]) -> str:
    return " ".join(
        [
            str(src.get("text") or ""),
            str(src.get("description") or ""),
            str(src.get("summary") or ""),
            str(src.get("display_name") or ""),
            str(src.get("canonical_name") or ""),
        ]
    )
 def is_strong_source_match(query: str, src: Dict[str, Any], cfg: AssistantRetrievalConfig) -> bool:
    q_tokens = query_tokens(query, cfg.query_stopwords)
    if not q_tokens:
        return True
    s_tokens = query_tokens(source_text_for_match(src), cfg.query_stopwords)
    overlap = len(q_tokens.intersection(s_tokens))
    q_len = max(1, len(q_tokens))
    coverage = overlap / q_len
    min_overlap = cfg.min_token_overlap
    if q_len >= 2:
        min_overlap = max(min_overlap, 2)
    return overlap >= min_overlap and coverage >= cfg.source_min_coverage
 async def retrieve_sources_two_stage(
    query: str,
    release_name: Optional[str],
    max_sources: int,
    include_release_recent_fallback: bool,
    cfg: AssistantRetrievalConfig,
    es_search_hits: Callable[[str, int, Optional[str]], Awaitable[List[Dict[str, Any]]]],
    es_recent_by_release: Callable[[str, int], Awaitable[List[Dict[str, Any]]]],
 ) -> List[Dict[str, Any]]:
    candidate_size = max(max_sources, max_sources * max(2, cfg.source_candidate_multiplier))
    seen_keys: set[str] = set()
    candidates: List[Dict[str, Any]] = []
    def add_hits(hs: List[Dict[str, Any]]) -> None:
        for h in hs:
            src = h.get("_source", {}) or {}
            key = str(src.get("concept_id") or src.get("source_pk") or "")
            if not key:
                key = hashlib.sha256(
                    json.dumps(src, ensure_ascii=False, sort_keys=True).encode("utf-8")
                ).hexdigest()[:20]
            if key in seen_keys:
                continue
            seen_keys.add(key)
            candidates.append(h)
    try:
        add_hits(await es_search_hits(query, candidate_size, release_name))
    except Exception as e:
        print(f"[WARN] stage1 release search failed: {e}")
    if len(candidates) < max_sources:
        try:
            add_hits(await es_search_hits(query, candidate_size, None))
        except Exception as e:
            print(f"[WARN] stage1 global search failed: {e}")
    if len(candidates) < max_sources and include_release_recent_fallback and release_name:
        try:
            add_hits(await es_recent_by_release(release_name, candidate_size))
        except Exception as e:
            print(f"[WARN] stage1 release-recent fallback failed: {e}")
    q_tokens = query_tokens(query, cfg.query_stopwords)
    ranked: List[Dict[str, Any]] = []
    for h in candidates:
        src = h.get("_source", {}) or {}
        s_tokens = query_tokens(source_text_for_match(src), cfg.query_stopwords)
        overlap = len(q_tokens.intersection(s_tokens)) if q_tokens else 0
        base_score = float(h.get("_score") or 0.0)
        ranked.append({"hit": h, "overlap": overlap, "base_score": base_score})
    ranked.sort(key=lambda x: (x["overlap"], x["base_score"]), reverse=True)
    relevant = []
    for x in ranked:
        src = x["hit"].get("_source", {}) or {}
        if is_strong_source_match(query, src, cfg):
            relevant.append(x)
    if relevant:
        return [x["hit"] for x in relevant[:max_sources]]
    return []
--- a/services/imap_ingest.py
+++ b/services/imap_ingest.py
@ -0,0 +1,193 @@
 import email
 import hashlib
 import imaplib
 import os
 from datetime import timezone
 from email import policy
 from email.utils import parseaddr, parsedate_to_datetime
 from typing import Any, Dict, List, Optional
 def clean_header_id(v: Optional[str]) -> str:
    if not v:
        return ""
    return v.strip().strip("<>").strip()
 def normalize_thread_id(msg_id: str, refs: str, in_reply_to: str, subject: str, sender: str) -> str:
    refs_clean = clean_header_id(refs.split()[-1] if refs else "")
    in_reply_clean = clean_header_id(in_reply_to)
    if refs_clean:
        return f"thread:{refs_clean}"
    if in_reply_clean:
        return f"thread:{in_reply_clean}"
    seed = f"{subject.strip().lower()}|{sender.strip().lower()}"
    if not seed.strip("|"):
        seed = msg_id
    return "thread:" + hashlib.sha256(seed.encode("utf-8")).hexdigest()[:24]
 def extract_body_text(msg: email.message.Message) -> str:
    try:
        if msg.is_multipart():
            for part in msg.walk():
                ctype = (part.get_content_type() or "").lower()
                disp = (part.get("Content-Disposition") or "").lower()
                if ctype == "text/plain" and "attachment" not in disp:
                    payload_obj = part.get_content()
                    if isinstance(payload_obj, str):
                        return payload_obj.strip()
                    if isinstance(payload_obj, bytes):
                        return payload_obj.decode(part.get_content_charset() or "utf-8", errors="replace").strip()
            for part in msg.walk():
                ctype = (part.get_content_type() or "").lower()
                if ctype == "text/html":
                    html_obj = part.get_content()
                    if isinstance(html_obj, bytes):
                        html_obj = html_obj.decode(part.get_content_charset() or "utf-8", errors="replace")
                    if isinstance(html_obj, str):
                        return html_obj.strip()
            return ""
        payload_obj = msg.get_content()
        if isinstance(payload_obj, str):
            return payload_obj.strip()
        if isinstance(payload_obj, bytes):
            return payload_obj.decode(msg.get_content_charset() or "utf-8", errors="replace").strip()
        return ""
    except Exception:
        return ""
 def fetch_imap_messages_blocking(
    payload: Any,
    effective_search_criteria: str,
    since_uid: Optional[int],
 ) -> List[Dict[str, Any]]:
    password = getattr(payload, "password", None) or os.getenv("IMAP_PASSWORD", "")
    if not password:
        raise ValueError("IMAP password missing: provide payload.password or set IMAP_PASSWORD")
    host = str(getattr(payload, "host"))
    port = int(getattr(payload, "port"))
    use_ssl = bool(getattr(payload, "use_ssl"))
    username = str(getattr(payload, "username"))
    mailbox = str(getattr(payload, "mailbox"))
    max_messages = int(getattr(payload, "max_messages"))
    channel = str(getattr(payload, "channel"))
    if use_ssl:
        client = imaplib.IMAP4_SSL(host, port)
    else:
        client = imaplib.IMAP4(host, port)
    try:
        status, _ = client.login(username, password)
        if status != "OK":
            raise RuntimeError("IMAP login failed")
        status, _ = client.select(mailbox, readonly=True)
        if status != "OK":
            raise RuntimeError(f"IMAP select mailbox failed: {mailbox}")
        if since_uid is not None:
            status, search_data = client.uid("search", None, "UID", f"{int(since_uid) + 1}:*")
        else:
            status, search_data = client.uid("search", None, effective_search_criteria)
        if status != "OK":
            raise RuntimeError(f"IMAP search failed: {effective_search_criteria}")
        uid_bytes = search_data[0] if search_data else b""
        uid_list = [u for u in uid_bytes.decode("utf-8", errors="replace").split() if u]
        if since_uid is not None:
            filtered: List[str] = []
            for u in uid_list:
                try:
                    if int(u) > int(since_uid):
                        filtered.append(u)
                except Exception:
                    continue
            uid_list = filtered
        if not uid_list:
            return []
        is_uid_window = since_uid is not None
        if is_uid_window:
            selected_uids = uid_list[:max_messages]
        else:
            selected_uids = uid_list[-max_messages:]
        out: List[Dict[str, Any]] = []
        for uid in selected_uids:
            status, msg_data = client.uid("fetch", uid, "(RFC822)")
            if status != "OK" or not msg_data:
                continue
            raw_bytes = None
            for part in msg_data:
                if isinstance(part, tuple) and len(part) >= 2 and isinstance(part[1], (bytes, bytearray)):
                    raw_bytes = bytes(part[1])
                    break
            if not raw_bytes:
                continue
            msg = email.message_from_bytes(raw_bytes, policy=policy.default)
            subject = str(msg.get("Subject") or "").strip()
            from_raw = str(msg.get("From") or "").strip()
            to_raw = str(msg.get("To") or "").strip()
            date_raw = str(msg.get("Date") or "").strip()
            msg_id_raw = str(msg.get("Message-Id") or msg.get("Message-ID") or "").strip()
            refs_raw = str(msg.get("References") or "").strip()
            in_reply_raw = str(msg.get("In-Reply-To") or "").strip()
            sender_email = parseaddr(from_raw)[1] or from_raw or "unknown"
            msg_id_clean = clean_header_id(msg_id_raw)
            if not msg_id_clean:
                seed = f"{uid}|{subject}|{sender_email}|{date_raw}"
                msg_id_clean = "imap-" + hashlib.sha256(seed.encode("utf-8")).hexdigest()[:24]
            thread_id = normalize_thread_id(
                msg_id=msg_id_clean,
                refs=refs_raw,
                in_reply_to=in_reply_raw,
                subject=subject,
                sender=sender_email,
            )
            sent_at_iso = None
            if date_raw:
                try:
                    dt = parsedate_to_datetime(date_raw)
                    if dt.tzinfo is None:
                        dt = dt.replace(tzinfo=timezone.utc)
                    sent_at_iso = dt.astimezone(timezone.utc).isoformat()
                except Exception:
                    sent_at_iso = None
            body = extract_body_text(msg)
            if not body:
                body = f"(no body) {subject}".strip()
            metadata = {
                "subject": subject,
                "from": from_raw,
                "to": to_raw,
                "date": date_raw,
                "imap_uid": uid,
                "mailbox": mailbox,
                "host": host,
                "username": username,
            }
            out.append(
                {
                    "thread_id": thread_id,
                    "message_id": msg_id_clean,
                    "sender": sender_email,
                    "channel": channel,
                    "sent_at": sent_at_iso,
                    "body": body,
                    "metadata": metadata,
                }
            )
        return out
    finally:
        try:
            client.logout()
        except Exception:
            pass
--- a/services/remote_ops.py
+++ b/services/remote_ops.py
@ -0,0 +1,555 @@
 import asyncio
 import base64
 import json
 import os
 import shlex
 import tempfile
 import uuid
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 from fastapi import HTTPException
@dataclass(frozen=True)
 class RemoteOpsConfig:
    ssh_host: str
    remote_dir: str
    ssh_bin: str
    ssh_opts: str
    scp_bin: str
    scp_opts: str
    timeout_sec: int
    projector_remote_script: str
    ingest_message_remote_script: str
    ingest_messages_batch_remote_script: str
    assistant_feedback_remote_script: str
    assistant_feedback_query_remote_script: str
    assistant_metrics_query_remote_script: str
    assistant_action_remote_script: str
    assistant_actions_query_remote_script: str
    assistant_proposals_remote_script: str
    assistant_proposals_query_remote_script: str
    runs_remote_script: str
    run_events_remote_script: str
    imap_checkpoint_remote_script: str
    create_messages_release_remote_script: str
 def _tail(text: str, max_chars: int = 8000) -> str:
    if len(text) <= max_chars:
        return text
    return text[-max_chars:]
 def _b64(s: str) -> str:
    return base64.b64encode(s.encode("utf-8")).decode("ascii")
 def _extract_json_array_from_text(text: str) -> List[Dict[str, Any]]:
    start = text.find("[")
    end = text.rfind("]")
    if start == -1 or end == -1 or end < start:
        raise ValueError("No JSON array found in output")
    candidate = text[start : end + 1]
    obj = json.loads(candidate)
    if not isinstance(obj, list):
        raise ValueError("Parsed value is not a JSON array")
    out: List[Dict[str, Any]] = []
    for item in obj:
        if isinstance(item, dict):
            out.append(item)
    return out
 def _extract_json_object_from_text(text: str) -> Dict[str, Any]:
    start = text.find("{")
    end = text.rfind("}")
    if start == -1 or end == -1 or end < start:
        raise ValueError("No JSON object found in output")
    candidate = text[start : end + 1]
    obj = json.loads(candidate)
    if not isinstance(obj, dict):
        raise ValueError("Parsed value is not a JSON object")
    return obj
 class RemoteOps:
    def __init__(self, cfg: RemoteOpsConfig):
        self.cfg = cfg
    def _ssh_args(self, command: str) -> List[str]:
        return [self.cfg.ssh_bin, *shlex.split(self.cfg.ssh_opts), self.cfg.ssh_host, command]
    async def _run_ssh(self, parts: List[str], timeout_error: str) -> Dict[str, Any]:
        command = f"cd {shlex.quote(self.cfg.remote_dir)} && {' '.join(shlex.quote(p) for p in parts)}"
        proc = await asyncio.create_subprocess_exec(
            *self._ssh_args(command),
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        try:
            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
        except asyncio.TimeoutError:
            proc.kill()
            await proc.wait()
            raise HTTPException(status_code=504, detail=timeout_error)
        out = stdout.decode("utf-8", errors="replace")
        err = stderr.decode("utf-8", errors="replace")
        return {"code": proc.returncode, "out": out, "err": err}
    def _error_detail(self, code: int, out: str, err: str) -> Dict[str, Any]:
        return {
            "host": self.cfg.ssh_host,
            "remote_dir": self.cfg.remote_dir,
            "exit_code": code,
            "stdout_tail": _tail(out),
            "stderr_tail": _tail(err),
        }
    async def run_remote_query_imap_checkpoint(
        self,
        host: str,
        mailbox: str,
        username: str,
        table: str,
    ) -> Optional[int]:
        res = await self._run_ssh(
            [self.cfg.imap_checkpoint_remote_script, host, mailbox, username, table],
            "IMAP checkpoint query timed out",
        )
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
        try:
            obj = _extract_json_object_from_text(res["out"])
            val = obj.get("max_uid")
            if val is None:
                return None
            return int(val)
        except Exception as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": f"Unable to parse IMAP checkpoint output: {e}",
                    "stdout_tail": _tail(res["out"]),
                    "stderr_tail": _tail(res["err"]),
                },
            )
    async def run_remote_create_messages_release(self, release_name: str) -> Dict[str, Any]:
        res = await self._run_ssh(
            [self.cfg.create_messages_release_remote_script, release_name],
            "Create messages release timed out",
        )
        result = {
            **self._error_detail(res["code"], res["out"], res["err"]),
            "release_name": release_name,
        }
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_projector(self, payload: Any) -> Dict[str, Any]:
        parts = [
            self.cfg.projector_remote_script,
            "--release-name", str(getattr(payload, "release_name", "")),
            "--targets", str(getattr(payload, "targets", "both")),
        ]
        if getattr(payload, "concept_table", None):
            parts.extend(["--concept-table", str(getattr(payload, "concept_table"))])
        if bool(getattr(payload, "dry_run", False)):
            parts.append("--dry-run")
        res = await self._run_ssh(parts, "Projector execution timed out")
        result = {
            **self._error_detail(res["code"], res["out"], res["err"]),
            "spark_read_done": "[STEP] spark_read_done" in res["out"],
            "projection_done": "[STEP] projection_done" in res["out"],
        }
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_ingest_message(self, payload: Any) -> Dict[str, Any]:
        parts = [
            self.cfg.ingest_message_remote_script,
            str(getattr(payload, "table")),
            str(getattr(payload, "thread_id")),
            str(getattr(payload, "message_id")),
            str(getattr(payload, "sender")),
            str(getattr(payload, "channel")),
            str(getattr(payload, "sent_at") or ""),
            _b64(str(getattr(payload, "body") or "")),
            _b64(json.dumps(getattr(payload, "metadata", {}) or {}, ensure_ascii=False)),
        ]
        res = await self._run_ssh(parts, "Message ingest execution timed out")
        result = self._error_detail(res["code"], res["out"], res["err"])
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_ingest_messages_batch(self, payload: Any) -> Dict[str, Any]:
        rows = []
        for m in list(getattr(payload, "messages", []) or []):
            rows.append(
                {
                    "thread_id": getattr(m, "thread_id"),
                    "message_id": getattr(m, "message_id"),
                    "sender": getattr(m, "sender"),
                    "channel": getattr(m, "channel"),
                    "sent_at": getattr(m, "sent_at"),
                    "body": getattr(m, "body"),
                    "metadata": getattr(m, "metadata"),
                }
            )
        if not rows:
            return {
                "host": self.cfg.ssh_host,
                "remote_dir": self.cfg.remote_dir,
                "exit_code": 0,
                "rows": 0,
                "stdout_tail": "[INFO] No rows to ingest",
                "stderr_tail": "",
            }
        local_tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8")
        remote_tmp = f"{self.cfg.remote_dir}/.ingest_messages_{uuid.uuid4().hex}.json"
        try:
            json.dump(rows, local_tmp, ensure_ascii=False)
            local_tmp.flush()
            local_tmp.close()
            scp_target = f"{self.cfg.ssh_host}:{remote_tmp}"
            scp_args = [self.cfg.scp_bin, *shlex.split(self.cfg.scp_opts), local_tmp.name, scp_target]
            scp_proc = await asyncio.create_subprocess_exec(
                *scp_args,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
            )
            try:
                scp_stdout, scp_stderr = await asyncio.wait_for(scp_proc.communicate(), timeout=self.cfg.timeout_sec)
            except asyncio.TimeoutError:
                scp_proc.kill()
                await scp_proc.wait()
                raise HTTPException(status_code=504, detail="Batch payload upload timed out")
            if scp_proc.returncode != 0:
                raise HTTPException(
                    status_code=502,
                    detail={
                        "host": self.cfg.ssh_host,
                        "remote_dir": self.cfg.remote_dir,
                        "exit_code": scp_proc.returncode,
                        "stdout_tail": _tail(scp_stdout.decode("utf-8", errors="replace")),
                        "stderr_tail": _tail(scp_stderr.decode("utf-8", errors="replace")),
                    },
                )
            payload_arg = f"@{remote_tmp}"
            parts = [
                self.cfg.ingest_messages_batch_remote_script,
                str(getattr(payload, "table")),
                str(getattr(payload, "dedupe_mode")),
                payload_arg,
            ]
            batch_cmd = " ".join(shlex.quote(p) for p in parts)
            command = (
                f"cd {shlex.quote(self.cfg.remote_dir)} && "
                f"({batch_cmd}); rc=$?; rm -f {shlex.quote(remote_tmp)}; exit $rc"
            )
            proc = await asyncio.create_subprocess_exec(
                *self._ssh_args(command),
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
            )
            try:
                stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
            except asyncio.TimeoutError:
                proc.kill()
                await proc.wait()
                raise HTTPException(status_code=504, detail="Batch message ingest execution timed out")
        finally:
            try:
                os.unlink(local_tmp.name)
            except Exception:
                pass
        out = stdout.decode("utf-8", errors="replace")
        err = stderr.decode("utf-8", errors="replace")
        result = {
            "host": self.cfg.ssh_host,
            "remote_dir": self.cfg.remote_dir,
            "exit_code": proc.returncode,
            "rows": len(rows),
            "stdout_tail": _tail(out),
            "stderr_tail": _tail(err),
        }
        if proc.returncode != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_assistant_feedback(self, feedback_id: str, payload: Any, created_at_utc: str) -> Dict[str, Any]:
        confidence = getattr(payload, "confidence", None)
        conf = confidence if confidence is not None else 0.0
        sources = [s.model_dump() for s in list(getattr(payload, "sources", []) or [])]
        parts = [
            self.cfg.assistant_feedback_remote_script,
            feedback_id,
            created_at_utc,
            str(getattr(payload, "outcome")),
            str(getattr(payload, "task_type")),
            str(getattr(payload, "release_name") or ""),
            f"{conf}",
            "true" if bool(getattr(payload, "needs_review", False)) else "false",
            _b64(str(getattr(payload, "goal") or "")),
            _b64(str(getattr(payload, "draft") or "")),
            _b64(str(getattr(payload, "final_text") or "")),
            _b64(json.dumps(sources, ensure_ascii=False)),
            _b64(str(getattr(payload, "notes") or "")),
        ]
        res = await self._run_ssh(parts, "Assistant feedback execution timed out")
        result = self._error_detail(res["code"], res["out"], res["err"])
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_query_assistant_feedback(
        self, outcome: Optional[str], task_type: Optional[str], release_name: Optional[str], limit: int
    ) -> Dict[str, Any]:
        parts = [
            self.cfg.assistant_feedback_query_remote_script,
            outcome or "",
            task_type or "",
            release_name or "",
            str(limit),
        ]
        res = await self._run_ssh(parts, "Assistant feedback query timed out")
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
        try:
            rows = _extract_json_array_from_text(res["out"])
        except Exception as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": f"Unable to parse feedback query output: {e}",
                    "stdout_tail": _tail(res["out"]),
                    "stderr_tail": _tail(res["err"]),
                },
            )
        return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
    async def run_remote_query_assistant_metrics(
        self, task_type: Optional[str], release_name: Optional[str], outcome: Optional[str], group_by: str, limit: int
    ) -> Dict[str, Any]:
        parts = [
            self.cfg.assistant_metrics_query_remote_script,
            task_type or "",
            release_name or "",
            outcome or "",
            group_by,
            str(limit),
        ]
        res = await self._run_ssh(parts, "Assistant metrics query timed out")
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
        try:
            rows = _extract_json_array_from_text(res["out"])
        except Exception as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": f"Unable to parse metrics query output: {e}",
                    "stdout_tail": _tail(res["out"]),
                    "stderr_tail": _tail(res["err"]),
                },
            )
        return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
    async def run_remote_assistant_action(
        self, action_id: str, payload: Any, step: Any, status: str, output_json: Dict[str, Any], error_text: Optional[str], created_at_utc: str
    ) -> Dict[str, Any]:
        parts = [
            self.cfg.assistant_action_remote_script,
            action_id,
            created_at_utc,
            str(getattr(payload, "task_type")),
            str(getattr(payload, "release_name") or ""),
            _b64(str(getattr(payload, "objective") or "")),
            str(getattr(step, "step_id")),
            _b64(str(getattr(step, "title") or "")),
            str(getattr(step, "action_type")),
            "true" if bool(getattr(step, "requires_approval", False)) else "false",
            "true" if bool(getattr(payload, "approved", False)) else "false",
            status,
            _b64(json.dumps(output_json, ensure_ascii=False)),
            _b64(error_text or ""),
        ]
        res = await self._run_ssh(parts, "Assistant action logging timed out")
        result = self._error_detail(res["code"], res["out"], res["err"])
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_query_assistant_actions(
        self,
        status: Optional[str],
        task_type: Optional[str],
        release_name: Optional[str],
        step_id: Optional[str],
        action_type: Optional[str],
        limit: int,
    ) -> Dict[str, Any]:
        parts = [
            self.cfg.assistant_actions_query_remote_script,
            status or "",
            task_type or "",
            release_name or "",
            step_id or "",
            action_type or "",
            str(limit),
        ]
        res = await self._run_ssh(parts, "Assistant actions query timed out")
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
        try:
            rows = _extract_json_array_from_text(res["out"])
        except Exception as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": f"Unable to parse actions query output: {e}",
                    "stdout_tail": _tail(res["out"]),
                    "stderr_tail": _tail(res["err"]),
                },
            )
        return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
    async def run_remote_record_assistant_proposals(
        self,
        proposal_set_id: str,
        created_at_utc: str,
        objective: str,
        release_name: Optional[str],
        summary: str,
        signals: Dict[str, Any],
        proposals: List[Dict[str, Any]],
    ) -> Dict[str, Any]:
        parts = [
            self.cfg.assistant_proposals_remote_script,
            proposal_set_id,
            created_at_utc,
            _b64(objective or ""),
            release_name or "",
            _b64(summary or ""),
            _b64(json.dumps(signals or {}, ensure_ascii=False)),
            _b64(json.dumps(proposals or [], ensure_ascii=False)),
        ]
        res = await self._run_ssh(parts, "Assistant proposals logging timed out")
        result = self._error_detail(res["code"], res["out"], res["err"])
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=result)
        return result
    async def run_remote_query_assistant_proposals(
        self,
        release_name: Optional[str],
        proposal_set_id: Optional[str],
        limit: int,
    ) -> Dict[str, Any]:
        parts = [
            self.cfg.assistant_proposals_query_remote_script,
            release_name or "",
            proposal_set_id or "",
            str(limit),
        ]
        res = await self._run_ssh(parts, "Assistant proposals query timed out")
        if res["code"] != 0:
            raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
        try:
            rows = _extract_json_array_from_text(res["out"])
        except Exception as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": f"Unable to parse proposals query output: {e}",
                    "stdout_tail": _tail(res["out"]),
                    "stderr_tail": _tail(res["err"]),
                },
            )
        return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
    async def run_remote_record_run(
        self,
        run_id: str,
        run_type: str,
        status: str,
        started_at_utc: str,
        finished_at_utc: str,
        actor: str,
        input_json: Dict[str, Any],
        output_json: Optional[Dict[str, Any]],
        error_text: Optional[str],
    ) -> None:
        parts = [
            self.cfg.runs_remote_script,
            run_id,
            run_type,
            status,
            started_at_utc,
            finished_at_utc,
            actor,
            _b64(json.dumps(input_json, ensure_ascii=False)),
            _b64(json.dumps(output_json, ensure_ascii=False) if output_json is not None else ""),
            _b64(error_text or ""),
        ]
        command = f"cd {shlex.quote(self.cfg.remote_dir)} && {' '.join(shlex.quote(p) for p in parts)}"
        proc = await asyncio.create_subprocess_exec(
            *self._ssh_args(command),
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
        if proc.returncode != 0:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": "Failed to record run in Iceberg",
                    "host": self.cfg.ssh_host,
                    "exit_code": proc.returncode,
                    "stdout_tail": _tail(stdout.decode("utf-8", errors="replace")),
                    "stderr_tail": _tail(stderr.decode("utf-8", errors="replace")),
                },
            )
    async def run_remote_record_event(self, run_id: str, event_type: str, detail_json: Dict[str, Any], created_at_utc: str) -> None:
        parts = [
            self.cfg.run_events_remote_script,
            run_id,
            event_type,
            created_at_utc,
            _b64(json.dumps(detail_json, ensure_ascii=False)),
        ]
        command = f"cd {shlex.quote(self.cfg.remote_dir)} && {' '.join(shlex.quote(p) for p in parts)}"
        proc = await asyncio.create_subprocess_exec(
            *self._ssh_args(command),
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
        if proc.returncode != 0:
            raise HTTPException(
                status_code=502,
                detail={
                    "message": "Failed to record run event in Iceberg",
                    "host": self.cfg.ssh_host,
                    "exit_code": proc.returncode,
                    "stdout_tail": _tail(stdout.decode("utf-8", errors="replace")),
                    "stderr_tail": _tail(stderr.decode("utf-8", errors="replace")),
                },
            )
    async def record_event_best_effort(self, run_id: str, event_type: str, detail_json: Dict[str, Any], created_at_utc: str) -> None:
        try:
            await self.run_remote_record_event(run_id, event_type, detail_json, created_at_utc)
        except Exception as e:
            print(f"[WARN] run event logging failed: run_id={run_id} event={event_type} error={e}")
		`@ -0,0 +1,2 @@`
							`"""Service-layer modules for assistant backend."""`