Excerpt boundary: Route binding + guarded control flow + retrieval/policy/refusal-or-answer branches. Imports and backend topology names are intentionally removed.
@router.post("/rag/query")
def rag_query(req: RagRequest):
try:
return run_rag_pipeline(req, bypass_hard_guards=False)
except Exception as exc:
raise HTTPException(status_code=500, detail=str(exc)) from exc
def run_rag_pipeline(req: RagRequest, *, bypass_hard_guards: bool = False):
request_id = "<GENERATED_REQUEST_ID>"
if not bypass_hard_guards:
if <PROMPT_INJECTION_CHECK>(req.question):
refusal_payload = <BUILD_REFUSAL>(
question=req.question,
topic="general",
risk_tier="LOW",
reason="Out of scope / security: [REDACTED_GUARD_REASON]",
chunks=[],
)
<AUDIT_CALL>(request_id, req.user_id, req.question, req.topk, [], refusal_payload["answer"], "<LATENCY_MS>")
return {
"request_id": request_id,
"answer": refusal_payload["answer"],
"policy": {"topic": "general", "risk_tier": "LOW", "allow_generation": False, "mode": "refusal"},
"citations": [],
"latency_ms": "<LATENCY_MS>",
"refusal": refusal_payload["refusal"],
}
if <SMALLTALK_CHECK>(req.question):
refusal_payload = <BUILD_REFUSAL>(
question=req.question,
topic="general",
risk_tier="LOW",
reason="Out of scope: non-SOP query",
chunks=[],
)
return {
"request_id": request_id,
"answer": refusal_payload["answer"],
"policy": {"topic": "general", "risk_tier": "LOW", "allow_generation": False, "mode": "refusal"},
"citations": [],
"latency_ms": "<LATENCY_MS>",
"refusal": refusal_payload["refusal"],
}
topic = (req.topic or <TOPIC_INFERENCE>(req.question) or "general").strip() or "general"
chunks = <RETRIEVAL_CALL>(req.question, req.topk, topic_filter=topic)
policy_decision = <POLICY_GATE_CALL>(req.question, chunks, topic_override=topic)
policy = <POLICY_TO_DICT>(policy_decision)
if (not chunks) or (not policy_decision.allow_generation) or (policy_decision.mode == "advice"):
refusal_payload = <BUILD_REFUSAL>(
question=req.question,
topic=policy_decision.topic or topic,
risk_tier=(policy_decision.risk_tier or "LOW"),
reason=(policy_decision.reason or "[REFUSED]"),
chunks=chunks,
)
<AUDIT_CALL>(request_id, req.user_id, req.question, req.topk, chunks, refusal_payload["answer"], "<LATENCY_MS>")
return {
"request_id": request_id,
"answer": refusal_payload["answer"],
"policy": policy,
"citations": refusal_payload.get("citations", []),
"latency_ms": "<LATENCY_MS>",
"refusal": refusal_payload["refusal"],
}
answer = <ANSWER_GENERATION_CALL>(req.question, chunks)
<AUDIT_CALL>(request_id, req.user_id, req.question, req.topk, chunks, answer, "<LATENCY_MS>")
return {
"request_id": request_id,
"answer": answer,
"policy": policy,
"citations": chunks,
"latency_ms": "<LATENCY_MS>",
}