refactor(app): extract assistant retrieval/planning/remote/imap services
This commit is contained in:
parent
89955662bf
commit
106983eee6
2
services/__init__.py
Normal file
2
services/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
"""Service-layer modules for assistant backend."""
|
||||||
|
|
||||||
140
services/assistant_planning.py
Normal file
140
services/assistant_planning.py
Normal file
|
|
@ -0,0 +1,140 @@
|
||||||
|
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _get(obj: Any, name: str, default: Any = None) -> Any:
|
||||||
|
return getattr(obj, name, default)
|
||||||
|
|
||||||
|
|
||||||
|
def build_assistant_plan_prompt(payload: Any, source_docs: List[Dict[str, Any]]) -> str:
|
||||||
|
constraints = _get(payload, "constraints", []) or []
|
||||||
|
constraint_lines = "\n".join(f"- {c}" for c in constraints) if constraints else "- None"
|
||||||
|
context_chunks = []
|
||||||
|
for d in source_docs:
|
||||||
|
src = d.get("_source", {}) or {}
|
||||||
|
context_chunks.append(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
f"concept_id: {src.get('concept_id', '')}",
|
||||||
|
f"source_pk: {src.get('source_pk', '')}",
|
||||||
|
f"source_table: {src.get('source_table', '')}",
|
||||||
|
f"release_name: {src.get('release_name', '')}",
|
||||||
|
f"text: {str(src.get('text') or src.get('description') or src.get('summary') or '')[:600]}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
context = "\n\n---\n\n".join(context_chunks) if context_chunks else "No retrieved context."
|
||||||
|
return (
|
||||||
|
"You are a cautious personal assistant planner. Produce an execution plan only; do not execute anything.\n"
|
||||||
|
"Return valid JSON ONLY with this exact shape:\n"
|
||||||
|
'{'
|
||||||
|
'"plan": ['
|
||||||
|
'{"step_id":"S1","title":"...","action_type":"research|draft|ask_user|prepare_data|review","requires_approval":true|false,"notes":"..."}'
|
||||||
|
"]"
|
||||||
|
"}\n"
|
||||||
|
f"Use at most {_get(payload, 'max_steps', 3)} steps.\n"
|
||||||
|
"Prefer safe read-only and draft actions first.\n\n"
|
||||||
|
f"Task type: {_get(payload, 'task_type', '')}\n"
|
||||||
|
f"Objective: {_get(payload, 'objective', '')}\n"
|
||||||
|
f"Constraints:\n{constraint_lines}\n\n"
|
||||||
|
"Retrieved context:\n"
|
||||||
|
f"{context}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fallback_plan(max_steps: int) -> List[Dict[str, Any]]:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"step_id": "S1",
|
||||||
|
"title": "Gather relevant facts and constraints",
|
||||||
|
"action_type": "research",
|
||||||
|
"requires_approval": False,
|
||||||
|
"notes": "Review messages/concepts and identify required context.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step_id": "S2",
|
||||||
|
"title": "Draft a response or action proposal",
|
||||||
|
"action_type": "draft",
|
||||||
|
"requires_approval": False,
|
||||||
|
"notes": "Produce a concise draft aligned with objective and constraints.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step_id": "S3",
|
||||||
|
"title": "Request user confirmation before any external action",
|
||||||
|
"action_type": "ask_user",
|
||||||
|
"requires_approval": True,
|
||||||
|
"notes": "Do not send or execute changes until approved.",
|
||||||
|
},
|
||||||
|
][: max_steps]
|
||||||
|
|
||||||
|
|
||||||
|
def find_plan_step(plan: List[Any], step_id: str) -> Optional[Any]:
|
||||||
|
for s in plan:
|
||||||
|
if _get(s, "step_id", "") == step_id:
|
||||||
|
return s
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_high_risk_step(step: Any) -> bool:
|
||||||
|
text = f"{_get(step, 'title', '')} {_get(step, 'notes', '') or ''}".lower()
|
||||||
|
high_risk_terms = [
|
||||||
|
"send",
|
||||||
|
"submit",
|
||||||
|
"pay",
|
||||||
|
"payment",
|
||||||
|
"transfer",
|
||||||
|
"wire",
|
||||||
|
"sign",
|
||||||
|
"file",
|
||||||
|
"delete",
|
||||||
|
"close account",
|
||||||
|
"change account",
|
||||||
|
]
|
||||||
|
return any(t in text for t in high_risk_terms)
|
||||||
|
|
||||||
|
|
||||||
|
def enforce_step_policy(payload: Any, step: Any) -> Optional[str]:
|
||||||
|
if bool(_get(step, "requires_approval", False)) and not bool(_get(payload, "approved", False)):
|
||||||
|
return "Step requires approval but approved=false."
|
||||||
|
if is_high_risk_step(step):
|
||||||
|
if not bool(_get(payload, "approved", False)):
|
||||||
|
return "High-risk step requires approved=true."
|
||||||
|
token = str(_get(payload, "manual_confirm_token", "") or "")
|
||||||
|
if not token.strip():
|
||||||
|
return "High-risk step requires manual_confirm_token."
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def execute_plan_step(
|
||||||
|
payload: Any,
|
||||||
|
step: Any,
|
||||||
|
generate_text: Callable[[str], Awaitable[str]],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
action_type = str(_get(step, "action_type", ""))
|
||||||
|
title = str(_get(step, "title", ""))
|
||||||
|
notes = str(_get(step, "notes", "") or "")
|
||||||
|
if action_type == "draft":
|
||||||
|
prompt = (
|
||||||
|
"Draft concise text for this approved planning step.\n"
|
||||||
|
f"Task type: {_get(payload, 'task_type', '')}\n"
|
||||||
|
f"Objective: {_get(payload, 'objective', '')}\n"
|
||||||
|
f"Step: {title}\n"
|
||||||
|
f"Notes: {notes}\n"
|
||||||
|
"Output only final draft text."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
text = await generate_text(prompt)
|
||||||
|
if not text.strip():
|
||||||
|
text = f"Draft for step '{title}'."
|
||||||
|
except Exception:
|
||||||
|
text = f"Draft for step '{title}'."
|
||||||
|
return {"draft": text}
|
||||||
|
if action_type == "research":
|
||||||
|
return {"note": "Research step acknowledged. Use /search or /assistant/draft for grounded retrieval."}
|
||||||
|
if action_type == "prepare_data":
|
||||||
|
return {"note": "Prepare-data step acknowledged.", "checklist": ["Collect required inputs", "Normalize format", "Validate completeness"]}
|
||||||
|
if action_type == "review":
|
||||||
|
return {"note": "Review step requires human review before external action."}
|
||||||
|
if action_type == "ask_user":
|
||||||
|
return {"question": "Please confirm whether to proceed with the next high-impact action."}
|
||||||
|
return {"note": "Step recognized but no executor implemented."}
|
||||||
|
|
||||||
115
services/assistant_retrieval.py
Normal file
115
services/assistant_retrieval.py
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_QUERY_STOPWORDS: Set[str] = {
|
||||||
|
"the", "and", "for", "with", "that", "this", "from", "have", "has", "had", "are", "was", "were", "will",
|
||||||
|
"would", "should", "could", "can", "you", "your", "about", "what", "when", "where", "which", "who", "whom",
|
||||||
|
"why", "how", "tomorrow", "today", "please", "any", "there", "need", "want", "know", "does", "did", "done",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AssistantRetrievalConfig:
|
||||||
|
min_token_overlap: int = 1
|
||||||
|
source_candidate_multiplier: int = 4
|
||||||
|
source_min_coverage: float = 0.6
|
||||||
|
query_stopwords: Set[str] = frozenset(DEFAULT_QUERY_STOPWORDS)
|
||||||
|
|
||||||
|
|
||||||
|
def query_tokens(text: str, stopwords: Set[str]) -> Set[str]:
|
||||||
|
return {
|
||||||
|
t for t in re.findall(r"[a-z0-9]{3,}", (text or "").lower())
|
||||||
|
if t not in stopwords
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def source_text_for_match(src: Dict[str, Any]) -> str:
|
||||||
|
return " ".join(
|
||||||
|
[
|
||||||
|
str(src.get("text") or ""),
|
||||||
|
str(src.get("description") or ""),
|
||||||
|
str(src.get("summary") or ""),
|
||||||
|
str(src.get("display_name") or ""),
|
||||||
|
str(src.get("canonical_name") or ""),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_strong_source_match(query: str, src: Dict[str, Any], cfg: AssistantRetrievalConfig) -> bool:
|
||||||
|
q_tokens = query_tokens(query, cfg.query_stopwords)
|
||||||
|
if not q_tokens:
|
||||||
|
return True
|
||||||
|
s_tokens = query_tokens(source_text_for_match(src), cfg.query_stopwords)
|
||||||
|
overlap = len(q_tokens.intersection(s_tokens))
|
||||||
|
q_len = max(1, len(q_tokens))
|
||||||
|
coverage = overlap / q_len
|
||||||
|
min_overlap = cfg.min_token_overlap
|
||||||
|
if q_len >= 2:
|
||||||
|
min_overlap = max(min_overlap, 2)
|
||||||
|
return overlap >= min_overlap and coverage >= cfg.source_min_coverage
|
||||||
|
|
||||||
|
|
||||||
|
async def retrieve_sources_two_stage(
|
||||||
|
query: str,
|
||||||
|
release_name: Optional[str],
|
||||||
|
max_sources: int,
|
||||||
|
include_release_recent_fallback: bool,
|
||||||
|
cfg: AssistantRetrievalConfig,
|
||||||
|
es_search_hits: Callable[[str, int, Optional[str]], Awaitable[List[Dict[str, Any]]]],
|
||||||
|
es_recent_by_release: Callable[[str, int], Awaitable[List[Dict[str, Any]]]],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
candidate_size = max(max_sources, max_sources * max(2, cfg.source_candidate_multiplier))
|
||||||
|
seen_keys: set[str] = set()
|
||||||
|
candidates: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
def add_hits(hs: List[Dict[str, Any]]) -> None:
|
||||||
|
for h in hs:
|
||||||
|
src = h.get("_source", {}) or {}
|
||||||
|
key = str(src.get("concept_id") or src.get("source_pk") or "")
|
||||||
|
if not key:
|
||||||
|
key = hashlib.sha256(
|
||||||
|
json.dumps(src, ensure_ascii=False, sort_keys=True).encode("utf-8")
|
||||||
|
).hexdigest()[:20]
|
||||||
|
if key in seen_keys:
|
||||||
|
continue
|
||||||
|
seen_keys.add(key)
|
||||||
|
candidates.append(h)
|
||||||
|
|
||||||
|
try:
|
||||||
|
add_hits(await es_search_hits(query, candidate_size, release_name))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] stage1 release search failed: {e}")
|
||||||
|
if len(candidates) < max_sources:
|
||||||
|
try:
|
||||||
|
add_hits(await es_search_hits(query, candidate_size, None))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] stage1 global search failed: {e}")
|
||||||
|
if len(candidates) < max_sources and include_release_recent_fallback and release_name:
|
||||||
|
try:
|
||||||
|
add_hits(await es_recent_by_release(release_name, candidate_size))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] stage1 release-recent fallback failed: {e}")
|
||||||
|
|
||||||
|
q_tokens = query_tokens(query, cfg.query_stopwords)
|
||||||
|
ranked: List[Dict[str, Any]] = []
|
||||||
|
for h in candidates:
|
||||||
|
src = h.get("_source", {}) or {}
|
||||||
|
s_tokens = query_tokens(source_text_for_match(src), cfg.query_stopwords)
|
||||||
|
overlap = len(q_tokens.intersection(s_tokens)) if q_tokens else 0
|
||||||
|
base_score = float(h.get("_score") or 0.0)
|
||||||
|
ranked.append({"hit": h, "overlap": overlap, "base_score": base_score})
|
||||||
|
|
||||||
|
ranked.sort(key=lambda x: (x["overlap"], x["base_score"]), reverse=True)
|
||||||
|
relevant = []
|
||||||
|
for x in ranked:
|
||||||
|
src = x["hit"].get("_source", {}) or {}
|
||||||
|
if is_strong_source_match(query, src, cfg):
|
||||||
|
relevant.append(x)
|
||||||
|
if relevant:
|
||||||
|
return [x["hit"] for x in relevant[:max_sources]]
|
||||||
|
return []
|
||||||
|
|
||||||
193
services/imap_ingest.py
Normal file
193
services/imap_ingest.py
Normal file
|
|
@ -0,0 +1,193 @@
|
||||||
|
import email
|
||||||
|
import hashlib
|
||||||
|
import imaplib
|
||||||
|
import os
|
||||||
|
from datetime import timezone
|
||||||
|
from email import policy
|
||||||
|
from email.utils import parseaddr, parsedate_to_datetime
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
def clean_header_id(v: Optional[str]) -> str:
|
||||||
|
if not v:
|
||||||
|
return ""
|
||||||
|
return v.strip().strip("<>").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_thread_id(msg_id: str, refs: str, in_reply_to: str, subject: str, sender: str) -> str:
|
||||||
|
refs_clean = clean_header_id(refs.split()[-1] if refs else "")
|
||||||
|
in_reply_clean = clean_header_id(in_reply_to)
|
||||||
|
if refs_clean:
|
||||||
|
return f"thread:{refs_clean}"
|
||||||
|
if in_reply_clean:
|
||||||
|
return f"thread:{in_reply_clean}"
|
||||||
|
seed = f"{subject.strip().lower()}|{sender.strip().lower()}"
|
||||||
|
if not seed.strip("|"):
|
||||||
|
seed = msg_id
|
||||||
|
return "thread:" + hashlib.sha256(seed.encode("utf-8")).hexdigest()[:24]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_body_text(msg: email.message.Message) -> str:
|
||||||
|
try:
|
||||||
|
if msg.is_multipart():
|
||||||
|
for part in msg.walk():
|
||||||
|
ctype = (part.get_content_type() or "").lower()
|
||||||
|
disp = (part.get("Content-Disposition") or "").lower()
|
||||||
|
if ctype == "text/plain" and "attachment" not in disp:
|
||||||
|
payload_obj = part.get_content()
|
||||||
|
if isinstance(payload_obj, str):
|
||||||
|
return payload_obj.strip()
|
||||||
|
if isinstance(payload_obj, bytes):
|
||||||
|
return payload_obj.decode(part.get_content_charset() or "utf-8", errors="replace").strip()
|
||||||
|
for part in msg.walk():
|
||||||
|
ctype = (part.get_content_type() or "").lower()
|
||||||
|
if ctype == "text/html":
|
||||||
|
html_obj = part.get_content()
|
||||||
|
if isinstance(html_obj, bytes):
|
||||||
|
html_obj = html_obj.decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||||
|
if isinstance(html_obj, str):
|
||||||
|
return html_obj.strip()
|
||||||
|
return ""
|
||||||
|
payload_obj = msg.get_content()
|
||||||
|
if isinstance(payload_obj, str):
|
||||||
|
return payload_obj.strip()
|
||||||
|
if isinstance(payload_obj, bytes):
|
||||||
|
return payload_obj.decode(msg.get_content_charset() or "utf-8", errors="replace").strip()
|
||||||
|
return ""
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_imap_messages_blocking(
|
||||||
|
payload: Any,
|
||||||
|
effective_search_criteria: str,
|
||||||
|
since_uid: Optional[int],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
password = getattr(payload, "password", None) or os.getenv("IMAP_PASSWORD", "")
|
||||||
|
if not password:
|
||||||
|
raise ValueError("IMAP password missing: provide payload.password or set IMAP_PASSWORD")
|
||||||
|
|
||||||
|
host = str(getattr(payload, "host"))
|
||||||
|
port = int(getattr(payload, "port"))
|
||||||
|
use_ssl = bool(getattr(payload, "use_ssl"))
|
||||||
|
username = str(getattr(payload, "username"))
|
||||||
|
mailbox = str(getattr(payload, "mailbox"))
|
||||||
|
max_messages = int(getattr(payload, "max_messages"))
|
||||||
|
channel = str(getattr(payload, "channel"))
|
||||||
|
|
||||||
|
if use_ssl:
|
||||||
|
client = imaplib.IMAP4_SSL(host, port)
|
||||||
|
else:
|
||||||
|
client = imaplib.IMAP4(host, port)
|
||||||
|
|
||||||
|
try:
|
||||||
|
status, _ = client.login(username, password)
|
||||||
|
if status != "OK":
|
||||||
|
raise RuntimeError("IMAP login failed")
|
||||||
|
status, _ = client.select(mailbox, readonly=True)
|
||||||
|
if status != "OK":
|
||||||
|
raise RuntimeError(f"IMAP select mailbox failed: {mailbox}")
|
||||||
|
|
||||||
|
if since_uid is not None:
|
||||||
|
status, search_data = client.uid("search", None, "UID", f"{int(since_uid) + 1}:*")
|
||||||
|
else:
|
||||||
|
status, search_data = client.uid("search", None, effective_search_criteria)
|
||||||
|
if status != "OK":
|
||||||
|
raise RuntimeError(f"IMAP search failed: {effective_search_criteria}")
|
||||||
|
uid_bytes = search_data[0] if search_data else b""
|
||||||
|
uid_list = [u for u in uid_bytes.decode("utf-8", errors="replace").split() if u]
|
||||||
|
if since_uid is not None:
|
||||||
|
filtered: List[str] = []
|
||||||
|
for u in uid_list:
|
||||||
|
try:
|
||||||
|
if int(u) > int(since_uid):
|
||||||
|
filtered.append(u)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
uid_list = filtered
|
||||||
|
if not uid_list:
|
||||||
|
return []
|
||||||
|
is_uid_window = since_uid is not None
|
||||||
|
if is_uid_window:
|
||||||
|
selected_uids = uid_list[:max_messages]
|
||||||
|
else:
|
||||||
|
selected_uids = uid_list[-max_messages:]
|
||||||
|
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for uid in selected_uids:
|
||||||
|
status, msg_data = client.uid("fetch", uid, "(RFC822)")
|
||||||
|
if status != "OK" or not msg_data:
|
||||||
|
continue
|
||||||
|
raw_bytes = None
|
||||||
|
for part in msg_data:
|
||||||
|
if isinstance(part, tuple) and len(part) >= 2 and isinstance(part[1], (bytes, bytearray)):
|
||||||
|
raw_bytes = bytes(part[1])
|
||||||
|
break
|
||||||
|
if not raw_bytes:
|
||||||
|
continue
|
||||||
|
msg = email.message_from_bytes(raw_bytes, policy=policy.default)
|
||||||
|
|
||||||
|
subject = str(msg.get("Subject") or "").strip()
|
||||||
|
from_raw = str(msg.get("From") or "").strip()
|
||||||
|
to_raw = str(msg.get("To") or "").strip()
|
||||||
|
date_raw = str(msg.get("Date") or "").strip()
|
||||||
|
msg_id_raw = str(msg.get("Message-Id") or msg.get("Message-ID") or "").strip()
|
||||||
|
refs_raw = str(msg.get("References") or "").strip()
|
||||||
|
in_reply_raw = str(msg.get("In-Reply-To") or "").strip()
|
||||||
|
|
||||||
|
sender_email = parseaddr(from_raw)[1] or from_raw or "unknown"
|
||||||
|
msg_id_clean = clean_header_id(msg_id_raw)
|
||||||
|
if not msg_id_clean:
|
||||||
|
seed = f"{uid}|{subject}|{sender_email}|{date_raw}"
|
||||||
|
msg_id_clean = "imap-" + hashlib.sha256(seed.encode("utf-8")).hexdigest()[:24]
|
||||||
|
|
||||||
|
thread_id = normalize_thread_id(
|
||||||
|
msg_id=msg_id_clean,
|
||||||
|
refs=refs_raw,
|
||||||
|
in_reply_to=in_reply_raw,
|
||||||
|
subject=subject,
|
||||||
|
sender=sender_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
sent_at_iso = None
|
||||||
|
if date_raw:
|
||||||
|
try:
|
||||||
|
dt = parsedate_to_datetime(date_raw)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
sent_at_iso = dt.astimezone(timezone.utc).isoformat()
|
||||||
|
except Exception:
|
||||||
|
sent_at_iso = None
|
||||||
|
|
||||||
|
body = extract_body_text(msg)
|
||||||
|
if not body:
|
||||||
|
body = f"(no body) {subject}".strip()
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"subject": subject,
|
||||||
|
"from": from_raw,
|
||||||
|
"to": to_raw,
|
||||||
|
"date": date_raw,
|
||||||
|
"imap_uid": uid,
|
||||||
|
"mailbox": mailbox,
|
||||||
|
"host": host,
|
||||||
|
"username": username,
|
||||||
|
}
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"thread_id": thread_id,
|
||||||
|
"message_id": msg_id_clean,
|
||||||
|
"sender": sender_email,
|
||||||
|
"channel": channel,
|
||||||
|
"sent_at": sent_at_iso,
|
||||||
|
"body": body,
|
||||||
|
"metadata": metadata,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
client.logout()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
555
services/remote_ops.py
Normal file
555
services/remote_ops.py
Normal file
|
|
@ -0,0 +1,555 @@
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shlex
|
||||||
|
import tempfile
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RemoteOpsConfig:
|
||||||
|
ssh_host: str
|
||||||
|
remote_dir: str
|
||||||
|
ssh_bin: str
|
||||||
|
ssh_opts: str
|
||||||
|
scp_bin: str
|
||||||
|
scp_opts: str
|
||||||
|
timeout_sec: int
|
||||||
|
projector_remote_script: str
|
||||||
|
ingest_message_remote_script: str
|
||||||
|
ingest_messages_batch_remote_script: str
|
||||||
|
assistant_feedback_remote_script: str
|
||||||
|
assistant_feedback_query_remote_script: str
|
||||||
|
assistant_metrics_query_remote_script: str
|
||||||
|
assistant_action_remote_script: str
|
||||||
|
assistant_actions_query_remote_script: str
|
||||||
|
assistant_proposals_remote_script: str
|
||||||
|
assistant_proposals_query_remote_script: str
|
||||||
|
runs_remote_script: str
|
||||||
|
run_events_remote_script: str
|
||||||
|
imap_checkpoint_remote_script: str
|
||||||
|
create_messages_release_remote_script: str
|
||||||
|
|
||||||
|
|
||||||
|
def _tail(text: str, max_chars: int = 8000) -> str:
|
||||||
|
if len(text) <= max_chars:
|
||||||
|
return text
|
||||||
|
return text[-max_chars:]
|
||||||
|
|
||||||
|
|
||||||
|
def _b64(s: str) -> str:
|
||||||
|
return base64.b64encode(s.encode("utf-8")).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json_array_from_text(text: str) -> List[Dict[str, Any]]:
|
||||||
|
start = text.find("[")
|
||||||
|
end = text.rfind("]")
|
||||||
|
if start == -1 or end == -1 or end < start:
|
||||||
|
raise ValueError("No JSON array found in output")
|
||||||
|
candidate = text[start : end + 1]
|
||||||
|
obj = json.loads(candidate)
|
||||||
|
if not isinstance(obj, list):
|
||||||
|
raise ValueError("Parsed value is not a JSON array")
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for item in obj:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
out.append(item)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json_object_from_text(text: str) -> Dict[str, Any]:
|
||||||
|
start = text.find("{")
|
||||||
|
end = text.rfind("}")
|
||||||
|
if start == -1 or end == -1 or end < start:
|
||||||
|
raise ValueError("No JSON object found in output")
|
||||||
|
candidate = text[start : end + 1]
|
||||||
|
obj = json.loads(candidate)
|
||||||
|
if not isinstance(obj, dict):
|
||||||
|
raise ValueError("Parsed value is not a JSON object")
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteOps:
|
||||||
|
def __init__(self, cfg: RemoteOpsConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
|
def _ssh_args(self, command: str) -> List[str]:
|
||||||
|
return [self.cfg.ssh_bin, *shlex.split(self.cfg.ssh_opts), self.cfg.ssh_host, command]
|
||||||
|
|
||||||
|
async def _run_ssh(self, parts: List[str], timeout_error: str) -> Dict[str, Any]:
|
||||||
|
command = f"cd {shlex.quote(self.cfg.remote_dir)} && {' '.join(shlex.quote(p) for p in parts)}"
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*self._ssh_args(command),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
raise HTTPException(status_code=504, detail=timeout_error)
|
||||||
|
out = stdout.decode("utf-8", errors="replace")
|
||||||
|
err = stderr.decode("utf-8", errors="replace")
|
||||||
|
return {"code": proc.returncode, "out": out, "err": err}
|
||||||
|
|
||||||
|
def _error_detail(self, code: int, out: str, err: str) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"host": self.cfg.ssh_host,
|
||||||
|
"remote_dir": self.cfg.remote_dir,
|
||||||
|
"exit_code": code,
|
||||||
|
"stdout_tail": _tail(out),
|
||||||
|
"stderr_tail": _tail(err),
|
||||||
|
}
|
||||||
|
|
||||||
|
async def run_remote_query_imap_checkpoint(
|
||||||
|
self,
|
||||||
|
host: str,
|
||||||
|
mailbox: str,
|
||||||
|
username: str,
|
||||||
|
table: str,
|
||||||
|
) -> Optional[int]:
|
||||||
|
res = await self._run_ssh(
|
||||||
|
[self.cfg.imap_checkpoint_remote_script, host, mailbox, username, table],
|
||||||
|
"IMAP checkpoint query timed out",
|
||||||
|
)
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
|
||||||
|
try:
|
||||||
|
obj = _extract_json_object_from_text(res["out"])
|
||||||
|
val = obj.get("max_uid")
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
return int(val)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": f"Unable to parse IMAP checkpoint output: {e}",
|
||||||
|
"stdout_tail": _tail(res["out"]),
|
||||||
|
"stderr_tail": _tail(res["err"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def run_remote_create_messages_release(self, release_name: str) -> Dict[str, Any]:
|
||||||
|
res = await self._run_ssh(
|
||||||
|
[self.cfg.create_messages_release_remote_script, release_name],
|
||||||
|
"Create messages release timed out",
|
||||||
|
)
|
||||||
|
result = {
|
||||||
|
**self._error_detail(res["code"], res["out"], res["err"]),
|
||||||
|
"release_name": release_name,
|
||||||
|
}
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_projector(self, payload: Any) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.projector_remote_script,
|
||||||
|
"--release-name", str(getattr(payload, "release_name", "")),
|
||||||
|
"--targets", str(getattr(payload, "targets", "both")),
|
||||||
|
]
|
||||||
|
if getattr(payload, "concept_table", None):
|
||||||
|
parts.extend(["--concept-table", str(getattr(payload, "concept_table"))])
|
||||||
|
if bool(getattr(payload, "dry_run", False)):
|
||||||
|
parts.append("--dry-run")
|
||||||
|
|
||||||
|
res = await self._run_ssh(parts, "Projector execution timed out")
|
||||||
|
result = {
|
||||||
|
**self._error_detail(res["code"], res["out"], res["err"]),
|
||||||
|
"spark_read_done": "[STEP] spark_read_done" in res["out"],
|
||||||
|
"projection_done": "[STEP] projection_done" in res["out"],
|
||||||
|
}
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_ingest_message(self, payload: Any) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.ingest_message_remote_script,
|
||||||
|
str(getattr(payload, "table")),
|
||||||
|
str(getattr(payload, "thread_id")),
|
||||||
|
str(getattr(payload, "message_id")),
|
||||||
|
str(getattr(payload, "sender")),
|
||||||
|
str(getattr(payload, "channel")),
|
||||||
|
str(getattr(payload, "sent_at") or ""),
|
||||||
|
_b64(str(getattr(payload, "body") or "")),
|
||||||
|
_b64(json.dumps(getattr(payload, "metadata", {}) or {}, ensure_ascii=False)),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Message ingest execution timed out")
|
||||||
|
result = self._error_detail(res["code"], res["out"], res["err"])
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_ingest_messages_batch(self, payload: Any) -> Dict[str, Any]:
|
||||||
|
rows = []
|
||||||
|
for m in list(getattr(payload, "messages", []) or []):
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"thread_id": getattr(m, "thread_id"),
|
||||||
|
"message_id": getattr(m, "message_id"),
|
||||||
|
"sender": getattr(m, "sender"),
|
||||||
|
"channel": getattr(m, "channel"),
|
||||||
|
"sent_at": getattr(m, "sent_at"),
|
||||||
|
"body": getattr(m, "body"),
|
||||||
|
"metadata": getattr(m, "metadata"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if not rows:
|
||||||
|
return {
|
||||||
|
"host": self.cfg.ssh_host,
|
||||||
|
"remote_dir": self.cfg.remote_dir,
|
||||||
|
"exit_code": 0,
|
||||||
|
"rows": 0,
|
||||||
|
"stdout_tail": "[INFO] No rows to ingest",
|
||||||
|
"stderr_tail": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
local_tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8")
|
||||||
|
remote_tmp = f"{self.cfg.remote_dir}/.ingest_messages_{uuid.uuid4().hex}.json"
|
||||||
|
try:
|
||||||
|
json.dump(rows, local_tmp, ensure_ascii=False)
|
||||||
|
local_tmp.flush()
|
||||||
|
local_tmp.close()
|
||||||
|
|
||||||
|
scp_target = f"{self.cfg.ssh_host}:{remote_tmp}"
|
||||||
|
scp_args = [self.cfg.scp_bin, *shlex.split(self.cfg.scp_opts), local_tmp.name, scp_target]
|
||||||
|
scp_proc = await asyncio.create_subprocess_exec(
|
||||||
|
*scp_args,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
scp_stdout, scp_stderr = await asyncio.wait_for(scp_proc.communicate(), timeout=self.cfg.timeout_sec)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
scp_proc.kill()
|
||||||
|
await scp_proc.wait()
|
||||||
|
raise HTTPException(status_code=504, detail="Batch payload upload timed out")
|
||||||
|
if scp_proc.returncode != 0:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"host": self.cfg.ssh_host,
|
||||||
|
"remote_dir": self.cfg.remote_dir,
|
||||||
|
"exit_code": scp_proc.returncode,
|
||||||
|
"stdout_tail": _tail(scp_stdout.decode("utf-8", errors="replace")),
|
||||||
|
"stderr_tail": _tail(scp_stderr.decode("utf-8", errors="replace")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
payload_arg = f"@{remote_tmp}"
|
||||||
|
parts = [
|
||||||
|
self.cfg.ingest_messages_batch_remote_script,
|
||||||
|
str(getattr(payload, "table")),
|
||||||
|
str(getattr(payload, "dedupe_mode")),
|
||||||
|
payload_arg,
|
||||||
|
]
|
||||||
|
batch_cmd = " ".join(shlex.quote(p) for p in parts)
|
||||||
|
command = (
|
||||||
|
f"cd {shlex.quote(self.cfg.remote_dir)} && "
|
||||||
|
f"({batch_cmd}); rc=$?; rm -f {shlex.quote(remote_tmp)}; exit $rc"
|
||||||
|
)
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*self._ssh_args(command),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
raise HTTPException(status_code=504, detail="Batch message ingest execution timed out")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(local_tmp.name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
out = stdout.decode("utf-8", errors="replace")
|
||||||
|
err = stderr.decode("utf-8", errors="replace")
|
||||||
|
result = {
|
||||||
|
"host": self.cfg.ssh_host,
|
||||||
|
"remote_dir": self.cfg.remote_dir,
|
||||||
|
"exit_code": proc.returncode,
|
||||||
|
"rows": len(rows),
|
||||||
|
"stdout_tail": _tail(out),
|
||||||
|
"stderr_tail": _tail(err),
|
||||||
|
}
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_assistant_feedback(self, feedback_id: str, payload: Any, created_at_utc: str) -> Dict[str, Any]:
|
||||||
|
confidence = getattr(payload, "confidence", None)
|
||||||
|
conf = confidence if confidence is not None else 0.0
|
||||||
|
sources = [s.model_dump() for s in list(getattr(payload, "sources", []) or [])]
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_feedback_remote_script,
|
||||||
|
feedback_id,
|
||||||
|
created_at_utc,
|
||||||
|
str(getattr(payload, "outcome")),
|
||||||
|
str(getattr(payload, "task_type")),
|
||||||
|
str(getattr(payload, "release_name") or ""),
|
||||||
|
f"{conf}",
|
||||||
|
"true" if bool(getattr(payload, "needs_review", False)) else "false",
|
||||||
|
_b64(str(getattr(payload, "goal") or "")),
|
||||||
|
_b64(str(getattr(payload, "draft") or "")),
|
||||||
|
_b64(str(getattr(payload, "final_text") or "")),
|
||||||
|
_b64(json.dumps(sources, ensure_ascii=False)),
|
||||||
|
_b64(str(getattr(payload, "notes") or "")),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant feedback execution timed out")
|
||||||
|
result = self._error_detail(res["code"], res["out"], res["err"])
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_query_assistant_feedback(
|
||||||
|
self, outcome: Optional[str], task_type: Optional[str], release_name: Optional[str], limit: int
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_feedback_query_remote_script,
|
||||||
|
outcome or "",
|
||||||
|
task_type or "",
|
||||||
|
release_name or "",
|
||||||
|
str(limit),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant feedback query timed out")
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
|
||||||
|
try:
|
||||||
|
rows = _extract_json_array_from_text(res["out"])
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": f"Unable to parse feedback query output: {e}",
|
||||||
|
"stdout_tail": _tail(res["out"]),
|
||||||
|
"stderr_tail": _tail(res["err"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
|
||||||
|
|
||||||
|
async def run_remote_query_assistant_metrics(
|
||||||
|
self, task_type: Optional[str], release_name: Optional[str], outcome: Optional[str], group_by: str, limit: int
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_metrics_query_remote_script,
|
||||||
|
task_type or "",
|
||||||
|
release_name or "",
|
||||||
|
outcome or "",
|
||||||
|
group_by,
|
||||||
|
str(limit),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant metrics query timed out")
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
|
||||||
|
try:
|
||||||
|
rows = _extract_json_array_from_text(res["out"])
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": f"Unable to parse metrics query output: {e}",
|
||||||
|
"stdout_tail": _tail(res["out"]),
|
||||||
|
"stderr_tail": _tail(res["err"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
|
||||||
|
|
||||||
|
async def run_remote_assistant_action(
|
||||||
|
self, action_id: str, payload: Any, step: Any, status: str, output_json: Dict[str, Any], error_text: Optional[str], created_at_utc: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_action_remote_script,
|
||||||
|
action_id,
|
||||||
|
created_at_utc,
|
||||||
|
str(getattr(payload, "task_type")),
|
||||||
|
str(getattr(payload, "release_name") or ""),
|
||||||
|
_b64(str(getattr(payload, "objective") or "")),
|
||||||
|
str(getattr(step, "step_id")),
|
||||||
|
_b64(str(getattr(step, "title") or "")),
|
||||||
|
str(getattr(step, "action_type")),
|
||||||
|
"true" if bool(getattr(step, "requires_approval", False)) else "false",
|
||||||
|
"true" if bool(getattr(payload, "approved", False)) else "false",
|
||||||
|
status,
|
||||||
|
_b64(json.dumps(output_json, ensure_ascii=False)),
|
||||||
|
_b64(error_text or ""),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant action logging timed out")
|
||||||
|
result = self._error_detail(res["code"], res["out"], res["err"])
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_query_assistant_actions(
|
||||||
|
self,
|
||||||
|
status: Optional[str],
|
||||||
|
task_type: Optional[str],
|
||||||
|
release_name: Optional[str],
|
||||||
|
step_id: Optional[str],
|
||||||
|
action_type: Optional[str],
|
||||||
|
limit: int,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_actions_query_remote_script,
|
||||||
|
status or "",
|
||||||
|
task_type or "",
|
||||||
|
release_name or "",
|
||||||
|
step_id or "",
|
||||||
|
action_type or "",
|
||||||
|
str(limit),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant actions query timed out")
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
|
||||||
|
try:
|
||||||
|
rows = _extract_json_array_from_text(res["out"])
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": f"Unable to parse actions query output: {e}",
|
||||||
|
"stdout_tail": _tail(res["out"]),
|
||||||
|
"stderr_tail": _tail(res["err"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
|
||||||
|
|
||||||
|
async def run_remote_record_assistant_proposals(
|
||||||
|
self,
|
||||||
|
proposal_set_id: str,
|
||||||
|
created_at_utc: str,
|
||||||
|
objective: str,
|
||||||
|
release_name: Optional[str],
|
||||||
|
summary: str,
|
||||||
|
signals: Dict[str, Any],
|
||||||
|
proposals: List[Dict[str, Any]],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_proposals_remote_script,
|
||||||
|
proposal_set_id,
|
||||||
|
created_at_utc,
|
||||||
|
_b64(objective or ""),
|
||||||
|
release_name or "",
|
||||||
|
_b64(summary or ""),
|
||||||
|
_b64(json.dumps(signals or {}, ensure_ascii=False)),
|
||||||
|
_b64(json.dumps(proposals or [], ensure_ascii=False)),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant proposals logging timed out")
|
||||||
|
result = self._error_detail(res["code"], res["out"], res["err"])
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_remote_query_assistant_proposals(
|
||||||
|
self,
|
||||||
|
release_name: Optional[str],
|
||||||
|
proposal_set_id: Optional[str],
|
||||||
|
limit: int,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
parts = [
|
||||||
|
self.cfg.assistant_proposals_query_remote_script,
|
||||||
|
release_name or "",
|
||||||
|
proposal_set_id or "",
|
||||||
|
str(limit),
|
||||||
|
]
|
||||||
|
res = await self._run_ssh(parts, "Assistant proposals query timed out")
|
||||||
|
if res["code"] != 0:
|
||||||
|
raise HTTPException(status_code=502, detail=self._error_detail(res["code"], res["out"], res["err"]))
|
||||||
|
try:
|
||||||
|
rows = _extract_json_array_from_text(res["out"])
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": f"Unable to parse proposals query output: {e}",
|
||||||
|
"stdout_tail": _tail(res["out"]),
|
||||||
|
"stderr_tail": _tail(res["err"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {"host": self.cfg.ssh_host, "remote_dir": self.cfg.remote_dir, "rows": rows}
|
||||||
|
|
||||||
|
async def run_remote_record_run(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
run_type: str,
|
||||||
|
status: str,
|
||||||
|
started_at_utc: str,
|
||||||
|
finished_at_utc: str,
|
||||||
|
actor: str,
|
||||||
|
input_json: Dict[str, Any],
|
||||||
|
output_json: Optional[Dict[str, Any]],
|
||||||
|
error_text: Optional[str],
|
||||||
|
) -> None:
|
||||||
|
parts = [
|
||||||
|
self.cfg.runs_remote_script,
|
||||||
|
run_id,
|
||||||
|
run_type,
|
||||||
|
status,
|
||||||
|
started_at_utc,
|
||||||
|
finished_at_utc,
|
||||||
|
actor,
|
||||||
|
_b64(json.dumps(input_json, ensure_ascii=False)),
|
||||||
|
_b64(json.dumps(output_json, ensure_ascii=False) if output_json is not None else ""),
|
||||||
|
_b64(error_text or ""),
|
||||||
|
]
|
||||||
|
command = f"cd {shlex.quote(self.cfg.remote_dir)} && {' '.join(shlex.quote(p) for p in parts)}"
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*self._ssh_args(command),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": "Failed to record run in Iceberg",
|
||||||
|
"host": self.cfg.ssh_host,
|
||||||
|
"exit_code": proc.returncode,
|
||||||
|
"stdout_tail": _tail(stdout.decode("utf-8", errors="replace")),
|
||||||
|
"stderr_tail": _tail(stderr.decode("utf-8", errors="replace")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def run_remote_record_event(self, run_id: str, event_type: str, detail_json: Dict[str, Any], created_at_utc: str) -> None:
|
||||||
|
parts = [
|
||||||
|
self.cfg.run_events_remote_script,
|
||||||
|
run_id,
|
||||||
|
event_type,
|
||||||
|
created_at_utc,
|
||||||
|
_b64(json.dumps(detail_json, ensure_ascii=False)),
|
||||||
|
]
|
||||||
|
command = f"cd {shlex.quote(self.cfg.remote_dir)} && {' '.join(shlex.quote(p) for p in parts)}"
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*self._ssh_args(command),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=self.cfg.timeout_sec)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail={
|
||||||
|
"message": "Failed to record run event in Iceberg",
|
||||||
|
"host": self.cfg.ssh_host,
|
||||||
|
"exit_code": proc.returncode,
|
||||||
|
"stdout_tail": _tail(stdout.decode("utf-8", errors="replace")),
|
||||||
|
"stderr_tail": _tail(stderr.decode("utf-8", errors="replace")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def record_event_best_effort(self, run_id: str, event_type: str, detail_json: Dict[str, Any], created_at_utc: str) -> None:
|
||||||
|
try:
|
||||||
|
await self.run_remote_record_event(run_id, event_type, detail_json, created_at_utc)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] run event logging failed: run_id={run_id} event={event_type} error={e}")
|
||||||
Loading…
Reference in a new issue