feat: enable SecurityMonitor guardrail + config cleanup

2026-03-30 21:08:34 +02:00
parent 5727dd17b3
commit 8f945683a9
9 changed files with 904 additions and 11 deletions
--- a/backend/cx-relay-mcp.py
+++ b/backend/cx-relay-mcp.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+import sys, json, base64, urllib.request, urllib.parse
+
+def call_cx(command):
+    encoded = base64.b64encode(command.encode()).decode()
+    data = urllib.parse.urlencode({"k": "WEVADS2026", "c": encoded}).encode()
+    req = urllib.request.Request("https://weval-consulting.com/api/cx", data=data)
+    with urllib.request.urlopen(req, timeout=120) as resp:
+        result = json.loads(resp.read())
+        return result.get("stdout", "") + result.get("stderr", "")
+
+for line in sys.stdin:
+    try:
+        msg = json.loads(line)
+        if msg.get("method") == "tools/list":
+            print(json.dumps({"result": {"tools": [
+                {"name": "cx_exec", "description": "Execute command on S204", "inputSchema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
+                {"name": "s95_exec", "description": "Execute command on S95 via SSH", "inputSchema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}
+            ]}}))
+        elif msg.get("method") == "tools/call":
+            name = msg["params"]["name"]
+            cmd = msg["params"].get("arguments", {}).get("command", "")
+            if name == "s95_exec":
+                result = call_cx("sshpass -p rspmk4J9i3fm ssh -o StrictHostKeyChecking=no -p 49222 root@10.1.0.3 " + repr(cmd))
+            else:
+                result = call_cx(cmd)
+            print(json.dumps({"result": {"content": [{"type": "text", "text": result}]}}))
+        sys.stdout.flush()
+    except Exception as e:
+        print(json.dumps({"error": str(e)}))
+        sys.stdout.flush()
--- a/backend/memory.json
+++ b/backend/memory.json
@@ -0,0 +1 @@
+{"version": "1.0", "lastUpdated": "2026-03-28T22:30:00Z", "user": {"workContext": {"summary": "Yanis Mahboub est le fondateur de WEVAL Consulting (Casablanca), cabinet de conseil en management couvrant ERP/SAP, Cloud, Supply Chain, Life Sciences/Pharma, Manufacturing, Finance et Cybersécurité. WEVAL opère un écosystème souverain de 42+ produits SaaS IA. Plateformes clés: WEVIA PUBLIC (widget chat + Centre Commande, 71 modules), WEVCODE (assistant code IA souverain), WEDROID (agent diagnostic backend v5.0), WEVADS IA (SPA email intelligence, 41 pages, 49 APIs), Ethica (plateforme HCP outreach pharma pour le Maghreb). Infra: S204 (204.168.152.13, nginx/PHP/PG13, Ollama 9 modèles), S95 (95.216.167.89, Apache/PHP/PG13, PMTA+KumoMTA, ~7.35M contacts), S151 (151.80.235.110, OVH tracking relay). AI chain: Groq llama-3.3-70b → Cerebras → Mistral → SambaNova → Ollama local. DeerFlow 2.0 actif sur S204 avec 42 skills, 3 modèles.", "updatedAt": "2026-03-28T22:30:00Z"}, "personalContext": {"summary": "Fondateur tech Casablanca. Langue: français (concis, majuscules=urgence). Exige fixes complets sans régressions. Principes: scan avant création, GOLD backup, NonReg obligatoire, approche systémique globale. Partenaires: Vistex, Confluent Digital, Groupe Ethica, Huawei Cloud.", "updatedAt": "2026-03-28T22:30:00Z"}, "topOfMind": {"summary": "WEVADS IA v3.3 GO LIVE (28 mars). DeerFlow restauré. Pending: rDNS Hetzner, S88 cancel, Ethica pilote Kaouther, GitHub PAT renewal, DMARC quarantine, OVH SMS + WhatsApp Meta + AWS S3, Graph API tenants, Huawei escalation, Vistex scheduling.", "updatedAt": "2026-03-28T22:30:00Z"}}, "history": {"recentMonths": {"summary": "Mars 2026: GO LIVE WEVADS IA v3.3. Academy E-Learning Premium. DeerFlow 2.0 restauré. Huawei Cloud page refaite (14 produits). Corrections accents FR. Button Fix v2. Ethica 50K+ médecins.", "updatedAt": "2026-03-28T22:30:00Z"}, "earlierContext": {"summary": "Février 2026: Migration KumoMTA. Chatbot WEVIA consolidé. Twenty CRM déployé. n8n + Authentik + Plausible.", "updatedAt": "2026-03-28T22:30:00Z"}, "longTermBackground": {"summary": "WEVAL Consulting fondé à Casablanca. Écosystème IA souverain 42+ SaaS. Spécialisations: ERP/SAP, Life Sciences, Cybersécurité IA. Territoire: Maghreb.", "updatedAt": "2026-03-28T22:30:00Z"}}, "facts": [{"id": "4de4df80-4608-41d0-892b-990efc5d76a3", "content": "WEVAL Consulting siège à Casablanca, Maroc", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "0736ab67-3012-4fc2-bfd6-58e86710c89e", "content": "Fondateur: Yanis Mahboub", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "3534db78-872f-42c0-9adb-4c071b56f427", "content": "42+ produits SaaS IA souverains", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "10f5705e-15f1-4a56-a586-a5939fde0d68", "content": "S204 = 204.168.152.13 (nginx, Ollama, DeerFlow)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "005f6647-7f33-4e78-a5d8-e452a83375cb", "content": "S95 = 95.216.167.89 (PMTA+KumoMTA, bases email)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "682f3c2a-cc77-4797-b0af-bca95114e13b", "content": "S151 = 151.80.235.110 (OVH tracking relay)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "29eff71f-56ff-4333-b10c-b60803162d24", "content": "AI chain: Groq → Cerebras → Mistral → SambaNova → Ollama", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "9c2a4a4f-fc35-4da9-92ab-77ddb76b338d", "content": "WEVADS IA v3.3 GO LIVE 28 mars 2026", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "483f7afb-dd0e-46bc-a619-625ec8abaeca", "content": "Ethica: plateforme pharma HCP, 50K+ médecins Maghreb", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "879c3ae6-ab76-4739-ab92-afe2a209ea56", "content": "DeerFlow ports: frontend=3002, LangGraph=2024, gateway=8001", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "c528f9cd-6d24-42f8-94d5-4dc9154569c0", "content": "Vistex Partner Agreement signé, Referral Fee 15% ACV", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "a0345693-bcec-437b-8859-a78987b37bd2", "content": "KumoMTA port 587, PMTA port 25 (coexistence)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}]}
--- a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py.bak-30mars
+++ b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py.bak-30mars
@@ -0,0 +1,516 @@
+from datetime import datetime
+
+from deerflow.config.agents_config import load_agent_soul
+from deerflow.skills import load_skills
+
+
+def _build_subagent_section(max_concurrent: int) -> str:
+    """Build the subagent system prompt section with dynamic concurrency limit.
+
+    Args:
+        max_concurrent: Maximum number of concurrent subagent calls allowed per response.
+
+    Returns:
+        Formatted subagent section string.
+    """
+    n = max_concurrent
+    return f"""<subagent_system>
+**🚀 SUBAGENT MODE ACTIVE - DECOMPOSE, DELEGATE, SYNTHESIZE**
+
+You are running with subagent capabilities enabled. Your role is to be a **task orchestrator**:
+1. **DECOMPOSE**: Break complex tasks into parallel sub-tasks
+2. **DELEGATE**: Launch multiple subagents simultaneously using parallel `task` calls
+3. **SYNTHESIZE**: Collect and integrate results into a coherent answer
+
+**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
+
+**⛔ HARD CONCURRENCY LIMIT: MAXIMUM {n} `task` CALLS PER RESPONSE. THIS IS NOT OPTIONAL.**
+- Each response, you may include **at most {n}** `task` tool calls. Any excess calls are **silently discarded** by the system — you will lose that work.
+- **Before launching subagents, you MUST count your sub-tasks in your thinking:**
+  - If count ≤ {n}: Launch all in this response.
+  - If count > {n}: **Pick the {n} most important/foundational sub-tasks for this turn.** Save the rest for the next turn.
+- **Multi-batch execution** (for >{n} sub-tasks):
+  - Turn 1: Launch sub-tasks 1-{n} in parallel → wait for results
+  - Turn 2: Launch next batch in parallel → wait for results
+  - ... continue until all sub-tasks are complete
+  - Final turn: Synthesize ALL results into a coherent answer
+- **Example thinking pattern**: "I identified 6 sub-tasks. Since the limit is {n} per turn, I will launch the first {n} now, and the rest in the next turn."
+
+**Available Subagents:**
+- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
+- **bash**: For command execution (git, build, test, deploy operations)
+
+**Your Orchestration Strategy:**
+
+✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
+
+For complex queries, break them down into focused sub-tasks and execute in parallel batches (max {n} per turn):
+
+**Example 1: "Why is Tencent's stock price declining?" (3 sub-tasks → 1 batch)**
+→ Turn 1: Launch 3 subagents in parallel:
+- Subagent 1: Recent financial reports, earnings data, and revenue trends
+- Subagent 2: Negative news, controversies, and regulatory issues
+- Subagent 3: Industry trends, competitor performance, and market sentiment
+→ Turn 2: Synthesize results
+
+**Example 2: "Compare 5 cloud providers" (5 sub-tasks → multi-batch)**
+→ Turn 1: Launch {n} subagents in parallel (first batch)
+→ Turn 2: Launch remaining subagents in parallel
+→ Final turn: Synthesize ALL results into comprehensive comparison
+
+**Example 3: "Refactor the authentication system"**
+→ Turn 1: Launch 3 subagents in parallel:
+- Subagent 1: Analyze current auth implementation and technical debt
+- Subagent 2: Research best practices and security patterns
+- Subagent 3: Review related tests, documentation, and vulnerabilities
+→ Turn 2: Synthesize results
+
+✅ **USE Parallel Subagents (max {n} per turn) when:**
+- **Complex research questions**: Requires multiple information sources or perspectives
+- **Multi-aspect analysis**: Task has several independent dimensions to explore
+- **Large codebases**: Need to analyze different parts simultaneously
+- **Comprehensive investigations**: Questions requiring thorough coverage from multiple angles
+
+❌ **DO NOT use subagents (execute directly) when:**
+- **Task cannot be decomposed**: If you can't break it into 2+ meaningful parallel sub-tasks, execute directly
+- **Ultra-simple actions**: Read one file, quick edits, single commands
+- **Need immediate clarification**: Must ask user before proceeding
+- **Meta conversation**: Questions about conversation history
+- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
+
+**CRITICAL WORKFLOW** (STRICTLY follow this before EVERY action):
+1. **COUNT**: In your thinking, list all sub-tasks and count them explicitly: "I have N sub-tasks"
+2. **PLAN BATCHES**: If N > {n}, explicitly plan which sub-tasks go in which batch:
+   - "Batch 1 (this turn): first {n} sub-tasks"
+   - "Batch 2 (next turn): next batch of sub-tasks"
+3. **EXECUTE**: Launch ONLY the current batch (max {n} `task` calls). Do NOT launch sub-tasks from future batches.
+4. **REPEAT**: After results return, launch the next batch. Continue until all batches complete.
+5. **SYNTHESIZE**: After ALL batches are done, synthesize all results.
+6. **Cannot decompose** → Execute directly using available tools (bash, read_file, web_search, etc.)
+
+**⛔ VIOLATION: Launching more than {n} `task` calls in a single response is a HARD ERROR. The system WILL discard excess calls and you WILL lose work. Always batch.**
+
+**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
+
+**How It Works:**
+- The task tool runs subagents asynchronously in the background
+- The backend automatically polls for completion (you don't need to poll)
+- The tool call will block until the subagent completes its work
+- Once complete, the result is returned to you directly
+
+**Usage Example 1 - Single Batch (≤{n} sub-tasks):**
+
+```python
+# User asks: "Why is Tencent's stock price declining?"
+# Thinking: 3 sub-tasks → fits in 1 batch
+
+# Turn 1: Launch 3 subagents in parallel
+task(description="Tencent financial data", prompt="...", subagent_type="general-purpose")
+task(description="Tencent news & regulation", prompt="...", subagent_type="general-purpose")
+task(description="Industry & market trends", prompt="...", subagent_type="general-purpose")
+# All 3 run in parallel → synthesize results
+```
+
+**Usage Example 2 - Multiple Batches (>{n} sub-tasks):**
+
+```python
+# User asks: "Compare AWS, Azure, GCP, Alibaba Cloud, and Oracle Cloud"
+# Thinking: 5 sub-tasks → need multiple batches (max {n} per batch)
+
+# Turn 1: Launch first batch of {n}
+task(description="AWS analysis", prompt="...", subagent_type="general-purpose")
+task(description="Azure analysis", prompt="...", subagent_type="general-purpose")
+task(description="GCP analysis", prompt="...", subagent_type="general-purpose")
+
+# Turn 2: Launch remaining batch (after first batch completes)
+task(description="Alibaba Cloud analysis", prompt="...", subagent_type="general-purpose")
+task(description="Oracle Cloud analysis", prompt="...", subagent_type="general-purpose")
+
+# Turn 3: Synthesize ALL results from both batches
+```
+
+**Counter-Example - Direct Execution (NO subagents):**
+
+```python
+# User asks: "Run the tests"
+# Thinking: Cannot decompose into parallel sub-tasks
+# → Execute directly
+
+bash("npm test")  # Direct execution, not task()
+```
+
+**CRITICAL**:
+- **Max {n} `task` calls per turn** - the system enforces this, excess calls are discarded
+- Only use `task` when you can launch 2+ subagents in parallel
+- Single task = No value from subagents = Execute directly
+- For >{n} sub-tasks, use sequential batches of {n} across multiple turns
+</subagent_system>"""
+
+
+SYSTEM_PROMPT_TEMPLATE = """
+<role>
+You are {agent_name}, an open-source super agent.
+</role>
+
+{soul}
+{memory_context}
+
+<thinking_style>
+- Think concisely and strategically about the user's request BEFORE taking action
+- Break down the task: What is clear? What is ambiguous? What is missing?
+- **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
+{subagent_thinking}- Never write down your full final answer or report in thinking process, but only outline
+- CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
+- Your response must contain the actual answer, not just a reference to what you thought about
+</thinking_style>
+
+<clarification_system>
+**WORKFLOW PRIORITY: CLARIFY → PLAN → ACT**
+1. **FIRST**: Analyze the request in your thinking - identify what's unclear, missing, or ambiguous
+2. **SECOND**: If clarification is needed, call `ask_clarification` tool IMMEDIATELY - do NOT start working
+3. **THIRD**: Only after all clarifications are resolved, proceed with planning and execution
+
+**CRITICAL RULE: Clarification ALWAYS comes BEFORE action. Never start working and clarify mid-execution.**
+
+**MANDATORY Clarification Scenarios - You MUST call ask_clarification BEFORE starting work when:**
+
+1. **Missing Information** (`missing_info`): Required details not provided
+   - Example: User says "create a web scraper" but doesn't specify the target website
+   - Example: "Deploy the app" without specifying environment
+   - **REQUIRED ACTION**: Call ask_clarification to get the missing information
+
+2. **Ambiguous Requirements** (`ambiguous_requirement`): Multiple valid interpretations exist
+   - Example: "Optimize the code" could mean performance, readability, or memory usage
+   - Example: "Make it better" is unclear what aspect to improve
+   - **REQUIRED ACTION**: Call ask_clarification to clarify the exact requirement
+
+3. **Approach Choices** (`approach_choice`): Several valid approaches exist
+   - Example: "Add authentication" could use JWT, OAuth, session-based, or API keys
+   - Example: "Store data" could use database, files, cache, etc.
+   - **REQUIRED ACTION**: Call ask_clarification to let user choose the approach
+
+4. **Risky Operations** (`risk_confirmation`): Destructive actions need confirmation
+   - Example: Deleting files, modifying production configs, database operations
+   - Example: Overwriting existing code or data
+   - **REQUIRED ACTION**: Call ask_clarification to get explicit confirmation
+
+5. **Suggestions** (`suggestion`): You have a recommendation but want approval
+   - Example: "I recommend refactoring this code. Should I proceed?"
+   - **REQUIRED ACTION**: Call ask_clarification to get approval
+
+**STRICT ENFORCEMENT:**
+- ❌ DO NOT start working and then ask for clarification mid-execution - clarify FIRST
+- ❌ DO NOT skip clarification for "efficiency" - accuracy matters more than speed
+- ❌ DO NOT make assumptions when information is missing - ALWAYS ask
+- ❌ DO NOT proceed with guesses - STOP and call ask_clarification first
+- ✅ Analyze the request in thinking → Identify unclear aspects → Ask BEFORE any action
+- ✅ If you identify the need for clarification in your thinking, you MUST call the tool IMMEDIATELY
+- ✅ After calling ask_clarification, execution will be interrupted automatically
+- ✅ Wait for user response - do NOT continue with assumptions
+
+**How to Use:**
+```python
+ask_clarification(
+    question="Your specific question here?",
+    clarification_type="missing_info",  # or other type
+    context="Why you need this information",  # optional but recommended
+    options=["option1", "option2"]  # optional, for choices
+)
+```
+
+**Example:**
+User: "Deploy the application"
+You (thinking): Missing environment info - I MUST ask for clarification
+You (action): ask_clarification(
+    question="Which environment should I deploy to?",
+    clarification_type="approach_choice",
+    context="I need to know the target environment for proper configuration",
+    options=["development", "staging", "production"]
+)
+[Execution stops - wait for user response]
+
+User: "staging"
+You: "Deploying to staging..." [proceed]
+</clarification_system>
+
+{skills_section}
+
+{deferred_tools_section}
+
+{subagent_section}
+
+<working_directory existed="true">
+- User uploads: `/mnt/user-data/uploads` - Files uploaded by the user (automatically listed in context)
+- User workspace: `/mnt/user-data/workspace` - Working directory for temporary files
+- Output files: `/mnt/user-data/outputs` - Final deliverables must be saved here
+
+**File Management:**
+- Uploaded files are automatically listed in the <uploaded_files> section before each request
+- Use `read_file` tool to read uploaded files using their paths from the list
+- For PDF, PPT, Excel, and Word files, converted Markdown versions (*.md) are available alongside originals
+- All temporary work happens in `/mnt/user-data/workspace`
+- Final deliverables must be copied to `/mnt/user-data/outputs` and presented using `present_file` tool
+{acp_section}
+</working_directory>
+
+<response_style>
+- Clear and Concise: Avoid over-formatting unless requested
+- Natural Tone: Use paragraphs and prose, not bullet points by default
+- Action-Oriented: Focus on delivering results, not explaining processes
+</response_style>
+
+<citations>
+**CRITICAL: Always include citations when using web search results**
+
+- **When to Use**: MANDATORY after web_search, web_fetch, or any external information source
+- **Format**: Use Markdown link format `[citation:TITLE](URL)` immediately after the claim
+- **Placement**: Inline citations should appear right after the sentence or claim they support
+- **Sources Section**: Also collect all citations in a "Sources" section at the end of reports
+
+**Example - Inline Citations:**
+```markdown
+The key AI trends for 2026 include enhanced reasoning capabilities and multimodal integration
+[citation:AI Trends 2026](https://techcrunch.com/ai-trends).
+Recent breakthroughs in language models have also accelerated progress
+[citation:OpenAI Research](https://openai.com/research).
+```
+
+**Example - Deep Research Report with Citations:**
+```markdown
+## Executive Summary
+
+DeerFlow is an open-source AI agent framework that gained significant traction in early 2026
+[citation:GitHub Repository](https://github.com/bytedance/deer-flow). The project focuses on
+providing a production-ready agent system with sandbox execution and memory management
+[citation:DeerFlow Documentation](https://deer-flow.dev/docs).
+
+## Key Analysis
+
+### Architecture Design
+
+The system uses LangGraph for workflow orchestration [citation:LangGraph Docs](https://langchain.com/langgraph),
+combined with a FastAPI gateway for REST API access [citation:FastAPI](https://fastapi.tiangolo.com).
+
+## Sources
+
+### Primary Sources
+- [GitHub Repository](https://github.com/bytedance/deer-flow) - Official source code and documentation
+- [DeerFlow Documentation](https://deer-flow.dev/docs) - Technical specifications
+
+### Media Coverage
+- [AI Trends 2026](https://techcrunch.com/ai-trends) - Industry analysis
+```
+
+**CRITICAL: Sources section format:**
+- Every item in the Sources section MUST be a clickable markdown link with URL
+- Use standard markdown link `[Title](URL) - Description` format (NOT `[citation:...]` format)
+- The `[citation:Title](URL)` format is ONLY for inline citations within the report body
+- ❌ WRONG: `GitHub 仓库 - 官方源代码和文档` (no URL!)
+- ❌ WRONG in Sources: `[citation:GitHub Repository](url)` (citation prefix is for inline only!)
+- ✅ RIGHT in Sources: `[GitHub Repository](https://github.com/bytedance/deer-flow) - 官方源代码和文档`
+
+**WORKFLOW for Research Tasks:**
+1. Use web_search to find sources → Extract {{title, url, snippet}} from results
+2. Write content with inline citations: `claim [citation:Title](url)`
+3. Collect all citations in a "Sources" section at the end
+4. NEVER write claims without citations when sources are available
+
+**CRITICAL RULES:**
+- ❌ DO NOT write research content without citations
+- ❌ DO NOT forget to extract URLs from search results
+- ✅ ALWAYS add `[citation:Title](URL)` after claims from external sources
+- ✅ ALWAYS include a "Sources" section listing all references
+</citations>
+
+<critical_reminders>
+- **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
+{subagent_reminder}- Skill First: Always load the relevant skill before starting **complex** tasks.
+- Progressive Loading: Load resources incrementally as referenced in skills
+- Output Files: Final deliverables must be in `/mnt/user-data/outputs`
+- Clarity: Be direct and helpful, avoid unnecessary meta-commentary
+- Including Images and Mermaid: Images and Mermaid diagrams are always welcomed in the Markdown format, and you're encouraged to use `![Image Description](image_path)\n\n` or "```mermaid" to display images in response or Markdown files
+- Multi-task: Better utilize parallel tool calling to call multiple tools at one time for better performance
+- Language Consistency: Keep using the same language as user's
+- Always Respond: Your thinking is internal. You MUST always provide a visible response to the user after thinking.
+</critical_reminders>
+"""
+
+
+def _get_memory_context(agent_name: str | None = None) -> str:
+    """Get memory context for injection into system prompt.
+
+    Args:
+        agent_name: If provided, loads per-agent memory. If None, loads global memory.
+
+    Returns:
+        Formatted memory context string wrapped in XML tags, or empty string if disabled.
+    """
+    try:
+        from deerflow.agents.memory import format_memory_for_injection, get_memory_data
+        from deerflow.config.memory_config import get_memory_config
+
+        config = get_memory_config()
+        if not config.enabled or not config.injection_enabled:
+            return ""
+
+        memory_data = get_memory_data(agent_name)
+        memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)
+
+        if not memory_content.strip():
+            return ""
+
+        return f"""<memory>
+{memory_content}
+</memory>
+"""
+    except Exception as e:
+        print(f"Failed to load memory context: {e}")
+        return ""
+
+
+def get_skills_prompt_section(available_skills: set[str] | None = None) -> str:
+    """Generate the skills prompt section with available skills list.
+
+    Returns the <skill_system>...</skill_system> block listing all enabled skills,
+    suitable for injection into any agent's system prompt.
+    """
+    skills = load_skills(enabled_only=True)
+
+    try:
+        from deerflow.config import get_app_config
+
+        config = get_app_config()
+        container_base_path = config.skills.container_path
+    except Exception:
+        container_base_path = "/mnt/skills"
+
+    if not skills:
+        return ""
+
+    if available_skills is not None:
+        skills = [skill for skill in skills if skill.name in available_skills]
+
+    skill_items = "\n".join(
+        f"    <skill>\n        <name>{skill.name}</name>\n        <description>{skill.description}</description>\n        <location>{skill.get_container_file_path(container_base_path)}</location>\n    </skill>" for skill in skills
+    )
+    skills_list = f"<available_skills>\n{skill_items}\n</available_skills>"
+
+    return f"""<skill_system>
+You have access to skills that provide optimized workflows for specific tasks. Each skill contains best practices, frameworks, and references to additional resources.
+
+**Progressive Loading Pattern:**
+1. When a user query matches a skill's use case, immediately call `read_file` on the skill's main file using the path attribute provided in the skill tag below
+2. Read and understand the skill's workflow and instructions
+3. The skill file contains references to external resources under the same folder
+4. Load referenced resources only when needed during execution
+5. Follow the skill's instructions precisely
+
+**Skills are located at:** {container_base_path}
+
+{skills_list}
+
+</skill_system>"""
+
+
+def get_agent_soul(agent_name: str | None) -> str:
+    # Append SOUL.md (agent personality) if present
+    soul = load_agent_soul(agent_name)
+    if soul:
+        return f"<soul>\n{soul}\n</soul>\n" if soul else ""
+    return ""
+
+
+def get_deferred_tools_prompt_section() -> str:
+    """Generate <available-deferred-tools> block for the system prompt.
+
+    Lists only deferred tool names so the agent knows what exists
+    and can use tool_search to load them.
+    Returns empty string when tool_search is disabled or no tools are deferred.
+    """
+    from deerflow.tools.builtins.tool_search import get_deferred_registry
+
+    try:
+        from deerflow.config import get_app_config
+
+        if not get_app_config().tool_search.enabled:
+            return ""
+    except FileNotFoundError:
+        return ""
+
+    registry = get_deferred_registry()
+    if not registry:
+        return ""
+
+    names = "\n".join(e.name for e in registry.entries)
+    return f"<available-deferred-tools>\n{names}\n</available-deferred-tools>"
+
+
+def _build_acp_section() -> str:
+    """Build the ACP agent prompt section, only if ACP agents are configured."""
+    try:
+        from deerflow.config.acp_config import get_acp_agents
+
+        agents = get_acp_agents()
+        if not agents:
+            return ""
+    except Exception:
+        return ""
+
+    return (
+        "\n**ACP Agent Tasks (invoke_acp_agent):**\n"
+        "- ACP agents (e.g. codex, claude_code) run in their own independent workspace — NOT in `/mnt/user-data/`\n"
+        "- When writing prompts for ACP agents, describe the task only — do NOT reference `/mnt/user-data` paths\n"
+        "- ACP agent results are accessible at `/mnt/acp-workspace/` (read-only) — use `ls`, `read_file`, or `bash cp` to retrieve output files\n"
+        "- To deliver ACP output to the user: copy from `/mnt/acp-workspace/<file>` to `/mnt/user-data/outputs/<file>`, then use `present_file`"
+    )
+
+
+def apply_prompt_template(subagent_enabled: bool = False, max_concurrent_subagents: int = 3, *, agent_name: str | None = None, available_skills: set[str] | None = None) -> str:
+    # Get memory context
+    memory_context = _get_memory_context(agent_name)
+
+    # Include subagent section only if enabled (from runtime parameter)
+    n = max_concurrent_subagents
+    subagent_section = _build_subagent_section(n) if subagent_enabled else ""
+
+    # Add subagent reminder to critical_reminders if enabled
+    subagent_reminder = (
+        "- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks. "
+        f"**HARD LIMIT: max {n} `task` calls per response.** "
+        f"If >{n} sub-tasks, split into sequential batches of ≤{n}. Synthesize after ALL batches complete.\n"
+        if subagent_enabled
+        else ""
+    )
+
+    # Add subagent thinking guidance if enabled
+    subagent_thinking = (
+        "- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, COUNT them. "
+        f"If count > {n}, you MUST plan batches of ≤{n} and only launch the FIRST batch now. "
+        f"NEVER launch more than {n} `task` calls in one response.**\n"
+        if subagent_enabled
+        else ""
+    )
+
+    # Get skills section
+    skills_section = get_skills_prompt_section(available_skills)
+
+    # Get deferred tools section (tool_search)
+    deferred_tools_section = get_deferred_tools_prompt_section()
+
+    # Build ACP agent section only if ACP agents are configured
+    acp_section = _build_acp_section()
+
+    # Format the prompt with dynamic skills and memory
+    prompt = SYSTEM_PROMPT_TEMPLATE.format(
+        agent_name=agent_name or "DeerFlow 2.0",
+        soul=get_agent_soul(agent_name),
+        skills_section=skills_section,
+        deferred_tools_section=deferred_tools_section,
+        memory_context=memory_context,
+        subagent_section=subagent_section,
+        subagent_reminder=subagent_reminder,
+        subagent_thinking=subagent_thinking,
+        acp_section=acp_section,
+    )
+
+    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
--- a/backend/packages/harness/deerflow/agents/middlewares/clarification_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/clarification_middleware.py
@@ -55,7 +55,9 @@ class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
        question = args.get("question", "")
        clarification_type = args.get("clarification_type", "missing_info")
        context = args.get("context")
-        options = args.get("options", [])
+        options = args.get("options") or []
+        if not isinstance(options, list):
+            options = []

        # Type-specific icons
        type_icons = {
--- a/backend/packages/harness/deerflow/agents/middlewares/clarification_middleware.py.bak-30mars
+++ b/backend/packages/harness/deerflow/agents/middlewares/clarification_middleware.py.bak-30mars
@@ -0,0 +1,173 @@
+"""Middleware for intercepting clarification requests and presenting them to the user."""
+
+from collections.abc import Callable
+from typing import override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import ToolMessage
+from langgraph.graph import END
+from langgraph.prebuilt.tool_node import ToolCallRequest
+from langgraph.types import Command
+
+
+class ClarificationMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    pass
+
+
+class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
+    """Intercepts clarification tool calls and interrupts execution to present questions to the user.
+
+    When the model calls the `ask_clarification` tool, this middleware:
+    1. Intercepts the tool call before execution
+    2. Extracts the clarification question and metadata
+    3. Formats a user-friendly message
+    4. Returns a Command that interrupts execution and presents the question
+    5. Waits for user response before continuing
+
+    This replaces the tool-based approach where clarification continued the conversation flow.
+    """
+
+    state_schema = ClarificationMiddlewareState
+
+    def _is_chinese(self, text: str) -> bool:
+        """Check if text contains Chinese characters.
+
+        Args:
+            text: Text to check
+
+        Returns:
+            True if text contains Chinese characters
+        """
+        return any("\u4e00" <= char <= "\u9fff" for char in text)
+
+    def _format_clarification_message(self, args: dict) -> str:
+        """Format the clarification arguments into a user-friendly message.
+
+        Args:
+            args: The tool call arguments containing clarification details
+
+        Returns:
+            Formatted message string
+        """
+        question = args.get("question", "")
+        clarification_type = args.get("clarification_type", "missing_info")
+        context = args.get("context")
+        options = args.get("options", [])
+
+        # Type-specific icons
+        type_icons = {
+            "missing_info": "❓",
+            "ambiguous_requirement": "🤔",
+            "approach_choice": "🔀",
+            "risk_confirmation": "⚠️",
+            "suggestion": "💡",
+        }
+
+        icon = type_icons.get(clarification_type, "❓")
+
+        # Build the message naturally
+        message_parts = []
+
+        # Add icon and question together for a more natural flow
+        if context:
+            # If there's context, present it first as background
+            message_parts.append(f"{icon} {context}")
+            message_parts.append(f"\n{question}")
+        else:
+            # Just the question with icon
+            message_parts.append(f"{icon} {question}")
+
+        # Add options in a cleaner format
+        if options and len(options) > 0:
+            message_parts.append("")  # blank line for spacing
+            for i, option in enumerate(options, 1):
+                message_parts.append(f"  {i}. {option}")
+
+        return "\n".join(message_parts)
+
+    def _handle_clarification(self, request: ToolCallRequest) -> Command:
+        """Handle clarification request and return command to interrupt execution.
+
+        Args:
+            request: Tool call request
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Extract clarification arguments
+        args = request.tool_call.get("args", {})
+        question = args.get("question", "")
+
+        print("[ClarificationMiddleware] Intercepted clarification request")
+        print(f"[ClarificationMiddleware] Question: {question}")
+
+        # Format the clarification message
+        formatted_message = self._format_clarification_message(args)
+
+        # Get the tool call ID
+        tool_call_id = request.tool_call.get("id", "")
+
+        # Create a ToolMessage with the formatted question
+        # This will be added to the message history
+        tool_message = ToolMessage(
+            content=formatted_message,
+            tool_call_id=tool_call_id,
+            name="ask_clarification",
+        )
+
+        # Return a Command that:
+        # 1. Adds the formatted tool message
+        # 2. Interrupts execution by going to __end__
+        # Note: We don't add an extra AIMessage here - the frontend will detect
+        # and display ask_clarification tool messages directly
+        return Command(
+            update={"messages": [tool_message]},
+            goto=END,
+        )
+
+    @override
+    def wrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage | Command],
+    ) -> ToolMessage | Command:
+        """Intercept ask_clarification tool calls and interrupt execution (sync version).
+
+        Args:
+            request: Tool call request
+            handler: Original tool execution handler
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Check if this is an ask_clarification tool call
+        if request.tool_call.get("name") != "ask_clarification":
+            # Not a clarification call, execute normally
+            return handler(request)
+
+        return self._handle_clarification(request)
+
+    @override
+    async def awrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage | Command],
+    ) -> ToolMessage | Command:
+        """Intercept ask_clarification tool calls and interrupt execution (async version).
+
+        Args:
+            request: Tool call request
+            handler: Original tool execution handler (async)
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Check if this is an ask_clarification tool call
+        if request.tool_call.get("name") != "ask_clarification":
+            # Not a clarification call, execute normally
+            return await handler(request)
+
+        return self._handle_clarification(request)
--- a/config.yaml.bak-30mars2
+++ b/config.yaml.bak-30mars2
@@ -0,0 +1,168 @@
+checkpointer:
+  connection_string: checkpoints.db
+  type: sqlite
+config_version: 3
+log_level: info
+memory:
+  debounce_seconds: 30
+  enabled: true
+  fact_confidence_threshold: 0.7
+  injection_enabled: true
+  max_facts: 100
+  max_injection_tokens: 2000
+  model_name: null
+  storage_path: memory.json
+models:
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Modèle WEVAL fine-tuné souverain — zero cloud
+  display_name: WEVAL Brain (Souverain)
+  max_tokens: 4096
+  model: weval-brain:latest
+  name: weval-brain
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Qwen3 8B local souverain — zero cloud
+  display_name: Qwen3 8B (Souverain)
+  max_tokens: 4096
+  model: qwen3:8b
+  name: qwen3-8b-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Qwen 2.5 7B local souverain — zero cloud
+  display_name: Qwen 2.5 7B (Souverain)
+  max_tokens: 4096
+  model: qwen2.5:7b
+  name: qwen2.5-7b-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Mistral 7B local souverain — zero cloud
+  display_name: Mistral 7B (Souverain)
+  max_tokens: 4096
+  model: mistral:latest
+  name: mistral-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: IBM Granite 4 local souverain — zero cloud
+  display_name: Granite 4 (Souverain)
+  max_tokens: 4096
+  model: granite4:latest
+  name: granite4-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: GLM4 9B local souverain — zero cloud
+  display_name: GLM4 9B (Souverain)
+  max_tokens: 4096
+  model: glm4:9b
+  name: glm4-9b-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Qwen3 4B local souverain léger — zero cloud
+  display_name: Qwen3 4B (Souverain)
+  max_tokens: 4096
+  model: qwen3:4b
+  name: qwen3-4b-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: MedLlama2 médical local souverain — zero cloud
+  display_name: MedLlama2 (Souverain Médical)
+  max_tokens: 4096
+  model: medllama2:latest
+  name: medllama2-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Meditron 7B médical local souverain — zero cloud
+  display_name: Meditron 7B (Souverain Médical)
+  max_tokens: 4096
+  model: meditron:7b
+  name: meditron-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: ollama
+  base_url: http://localhost:11434/v1
+  description: Qwen3.5 0.8B ultra-léger souverain — zero cloud
+  display_name: Qwen3.5 0.8B (Ultra-Léger Souverain)
+  max_tokens: 2048
+  model: qwen3.5:0.8b
+  name: qwen3.5-0.8b-sovereign
+  use: langchain_openai:ChatOpenAI
+- api_key: $SAMBANOVA_API_KEY
+  base_url: https://api.sambanova.ai/v1
+  description: Fast inference via SambaNova
+  display_name: SambaNova Llama 70B
+  max_tokens: 4096
+  model: Meta-Llama-3.3-70B-Instruct
+  name: sambanova-llama70b
+  use: langchain_openai:ChatOpenAI
+- api_key: $GROQ_API_KEY
+  base_url: https://api.groq.com/openai/v1
+  description: Fast inference via Groq
+  display_name: Groq Llama 70B
+  max_tokens: 4096
+  model: llama-3.3-70b-versatile
+  name: groq-llama70b
+  use: langchain_openai:ChatOpenAI
+- api_key: $CEREBRAS_API_KEY
+  base_url: https://api.cerebras.ai/v1
+  description: High quality via Cerebras
+  display_name: Cerebras Qwen 235B
+  max_tokens: 4096
+  model: qwen-3-235b-a22b-instruct-2507
+  name: cerebras-qwen
+  use: langchain_openai:ChatOpenAI
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+skills:
+  container_path: /mnt/skills
+summarization:
+  model: groq-llama70b
+title:
+  enabled: true
+  model: groq-llama70b
+token_usage:
+  enabled: false
+tool_groups:
+- name: web
+- name: file:read
+- name: file:write
+- name: bash
+tool_search:
+  enabled: false
+tools:
+- group: web
+  max_results: 5
+  name: web_search
+  use: deerflow.community.searxng_search.tools:web_search_tool
+- group: web
+  name: web_fetch
+  timeout: 10
+  use: deerflow.community.jina_ai.tools:web_fetch_tool
+- group: web
+  max_results: 5
+  name: image_search
+  use: deerflow.community.image_search.tools:image_search_tool
+- group: file:read
+  name: ls
+  use: deerflow.sandbox.tools:ls_tool
+- group: file:read
+  name: read_file
+  use: deerflow.sandbox.tools:read_file_tool
+- group: file:write
+  name: write_file
+  use: deerflow.sandbox.tools:write_file_tool
+- group: file:write
+  name: str_replace
+  use: deerflow.sandbox.tools:str_replace_tool
+- group: bash
+  name: bash
+  use: deerflow.sandbox.tools:bash_tool
+- group: web
+  name: notify_team
+  use: deerflow.community.mattermost_notify.tools:notify_team_tool
--- a/docker/nginx/nginx.local.conf
+++ b/docker/nginx/nginx.local.conf
@@ -140,9 +140,10 @@ http {
            proxy_request_buffering off;
        }

-        # Custom API: Other endpoints under /api/threads
+        # LangGraph: Thread management (create, search, state)
        location ~ ^/api/threads {
-            proxy_pass http://gateway;
+            rewrite ^/api/(.*) /$1 break;
+            proxy_pass http://langgraph;
            proxy_http_version 1.1;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
--- a/memory.json
+++ b/memory.json
@@ -0,0 +1 @@
+{"version": "1.0", "lastUpdated": "2026-03-28T22:30:00Z", "user": {"workContext": {"summary": "Yanis Mahboub est le fondateur de WEVAL Consulting (Casablanca), cabinet de conseil en management couvrant ERP/SAP, Cloud, Supply Chain, Life Sciences/Pharma, Manufacturing, Finance et Cybersécurité. WEVAL opère un écosystème souverain de 42+ produits SaaS IA. Plateformes clés: WEVIA PUBLIC (widget chat + Centre Commande, 71 modules), WEVCODE (assistant code IA souverain), WEDROID (agent diagnostic backend v5.0), WEVADS IA (SPA email intelligence, 41 pages, 49 APIs), Ethica (plateforme HCP outreach pharma pour le Maghreb). Infra: S204 (204.168.152.13, nginx/PHP/PG13, Ollama 9 modèles), S95 (95.216.167.89, Apache/PHP/PG13, PMTA+KumoMTA, ~7.35M contacts), S151 (151.80.235.110, OVH tracking relay). AI chain: Groq llama-3.3-70b → Cerebras → Mistral → SambaNova → Ollama local. DeerFlow 2.0 actif sur S204 avec 42 skills, 3 modèles.", "updatedAt": "2026-03-28T22:30:00Z"}, "personalContext": {"summary": "Fondateur tech Casablanca. Langue: français (concis, majuscules=urgence). Exige fixes complets sans régressions. Principes: scan avant création, GOLD backup, NonReg obligatoire, approche systémique globale. Partenaires: Vistex, Confluent Digital, Groupe Ethica, Huawei Cloud.", "updatedAt": "2026-03-28T22:30:00Z"}, "topOfMind": {"summary": "WEVADS IA v3.3 GO LIVE (28 mars). DeerFlow restauré. Pending: rDNS Hetzner, S88 cancel, Ethica pilote Kaouther, GitHub PAT renewal, DMARC quarantine, OVH SMS + WhatsApp Meta + AWS S3, Graph API tenants, Huawei escalation, Vistex scheduling.", "updatedAt": "2026-03-28T22:30:00Z"}}, "history": {"recentMonths": {"summary": "Mars 2026: GO LIVE WEVADS IA v3.3. Academy E-Learning Premium. DeerFlow 2.0 restauré. Huawei Cloud page refaite (14 produits). Corrections accents FR. Button Fix v2. Ethica 50K+ médecins.", "updatedAt": "2026-03-28T22:30:00Z"}, "earlierContext": {"summary": "Février 2026: Migration KumoMTA. Chatbot WEVIA consolidé. Twenty CRM déployé. n8n + Authentik + Plausible.", "updatedAt": "2026-03-28T22:30:00Z"}, "longTermBackground": {"summary": "WEVAL Consulting fondé à Casablanca. Écosystème IA souverain 42+ SaaS. Spécialisations: ERP/SAP, Life Sciences, Cybersécurité IA. Territoire: Maghreb.", "updatedAt": "2026-03-28T22:30:00Z"}}, "facts": [{"id": "4de4df80-4608-41d0-892b-990efc5d76a3", "content": "WEVAL Consulting siège à Casablanca, Maroc", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "0736ab67-3012-4fc2-bfd6-58e86710c89e", "content": "Fondateur: Yanis Mahboub", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "3534db78-872f-42c0-9adb-4c071b56f427", "content": "42+ produits SaaS IA souverains", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "10f5705e-15f1-4a56-a586-a5939fde0d68", "content": "S204 = 204.168.152.13 (nginx, Ollama, DeerFlow)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "005f6647-7f33-4e78-a5d8-e452a83375cb", "content": "S95 = 95.216.167.89 (PMTA+KumoMTA, bases email)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "682f3c2a-cc77-4797-b0af-bca95114e13b", "content": "S151 = 151.80.235.110 (OVH tracking relay)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "29eff71f-56ff-4333-b10c-b60803162d24", "content": "AI chain: Groq → Cerebras → Mistral → SambaNova → Ollama", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "9c2a4a4f-fc35-4da9-92ab-77ddb76b338d", "content": "WEVADS IA v3.3 GO LIVE 28 mars 2026", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "483f7afb-dd0e-46bc-a619-625ec8abaeca", "content": "Ethica: plateforme pharma HCP, 50K+ médecins Maghreb", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "879c3ae6-ab76-4739-ab92-afe2a209ea56", "content": "DeerFlow ports: frontend=3002, LangGraph=2024, gateway=8001", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "c528f9cd-6d24-42f8-94d5-4dc9154569c0", "content": "Vistex Partner Agreement signé, Referral Fee 15% ACV", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "a0345693-bcec-437b-8859-a78987b37bd2", "content": "KumoMTA port 587, PMTA port 25 (coexistence)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}]}
--- a/watchdog.sh
+++ b/watchdog.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
-# DeerFlow watchdog - restart if down
-for svc in deerflow-langgraph deerflow-gateway deerflow-frontend; do
-  if ! systemctl is-active --quiet $svc; then
-    systemctl restart $svc
-    echo "$(date) RESTART $svc" >> /opt/deer-flow/logs/watchdog.log
-    curl -s -X POST http://localhost:8065/hooks/pt54hzthf3b6pe6rgp1ionipnh       -H "Content-Type: application/json"       -d "{"text":"WATCHDOG: restarted $svc","username":"DeerFlow Bot"}" > /dev/null 2>&1
-  fi
-done
+# DeerFlow watchdog - monitors main deerflow service only
+# Sub-services (langgraph/gateway/frontend) are managed by deerflow.service directly
+
+if ! systemctl is-active --quiet deerflow; then
+  systemctl restart deerflow
+  echo "$(date) RESTART deerflow" >> /opt/deer-flow/logs/watchdog.log
+  curl -s -X POST http://localhost:8065/hooks/pt54hzthf3b6pe6rgp1ionipnh     -H "Content-Type: application/json"     -d '{"text":"WATCHDOG: restarted deerflow (main service)","username":"DeerFlow Bot"}' > /dev/null 2>&1
+fi
				`@@ -0,0 +1 @@`
				{"version": "1.0", "lastUpdated": "2026-03-28T22:30:00Z", "user": {"workContext": {"summary": "Yanis Mahboub est le fondateur de WEVAL Consulting (Casablanca), cabinet de conseil en management couvrant ERP/SAP, Cloud, Supply Chain, Life Sciences/Pharma, Manufacturing, Finance et Cybersécurité. WEVAL opère un écosystème souverain de 42+ produits SaaS IA. Plateformes clés: WEVIA PUBLIC (widget chat + Centre Commande, 71 modules), WEVCODE (assistant code IA souverain), WEDROID (agent diagnostic backend v5.0), WEVADS IA (SPA email intelligence, 41 pages, 49 APIs), Ethica (plateforme HCP outreach pharma pour le Maghreb). Infra: S204 (204.168.152.13, nginx/PHP/PG13, Ollama 9 modèles), S95 (95.216.167.89, Apache/PHP/PG13, PMTA+KumoMTA, ~7.35M contacts), S151 (151.80.235.110, OVH tracking relay). AI chain: Groq llama-3.3-70b → Cerebras → Mistral → SambaNova → Ollama local. DeerFlow 2.0 actif sur S204 avec 42 skills, 3 modèles.", "updatedAt": "2026-03-28T22:30:00Z"}, "personalContext": {"summary": "Fondateur tech Casablanca. Langue: français (concis, majuscules=urgence). Exige fixes complets sans régressions. Principes: scan avant création, GOLD backup, NonReg obligatoire, approche systémique globale. Partenaires: Vistex, Confluent Digital, Groupe Ethica, Huawei Cloud.", "updatedAt": "2026-03-28T22:30:00Z"}, "topOfMind": {"summary": "WEVADS IA v3.3 GO LIVE (28 mars). DeerFlow restauré. Pending: rDNS Hetzner, S88 cancel, Ethica pilote Kaouther, GitHub PAT renewal, DMARC quarantine, OVH SMS + WhatsApp Meta + AWS S3, Graph API tenants, Huawei escalation, Vistex scheduling.", "updatedAt": "2026-03-28T22:30:00Z"}}, "history": {"recentMonths": {"summary": "Mars 2026: GO LIVE WEVADS IA v3.3. Academy E-Learning Premium. DeerFlow 2.0 restauré. Huawei Cloud page refaite (14 produits). Corrections accents FR. Button Fix v2. Ethica 50K+ médecins.", "updatedAt": "2026-03-28T22:30:00Z"}, "earlierContext": {"summary": "Février 2026: Migration KumoMTA. Chatbot WEVIA consolidé. Twenty CRM déployé. n8n + Authentik + Plausible.", "updatedAt": "2026-03-28T22:30:00Z"}, "longTermBackground": {"summary": "WEVAL Consulting fondé à Casablanca. Écosystème IA souverain 42+ SaaS. Spécialisations: ERP/SAP, Life Sciences, Cybersécurité IA. Territoire: Maghreb.", "updatedAt": "2026-03-28T22:30:00Z"}}, "facts": [{"id": "4de4df80-4608-41d0-892b-990efc5d76a3", "content": "WEVAL Consulting siège à Casablanca, Maroc", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "0736ab67-3012-4fc2-bfd6-58e86710c89e", "content": "Fondateur: Yanis Mahboub", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "3534db78-872f-42c0-9adb-4c071b56f427", "content": "42+ produits SaaS IA souverains", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "10f5705e-15f1-4a56-a586-a5939fde0d68", "content": "S204 = 204.168.152.13 (nginx, Ollama, DeerFlow)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "005f6647-7f33-4e78-a5d8-e452a83375cb", "content": "S95 = 95.216.167.89 (PMTA+KumoMTA, bases email)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "682f3c2a-cc77-4797-b0af-bca95114e13b", "content": "S151 = 151.80.235.110 (OVH tracking relay)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "29eff71f-56ff-4333-b10c-b60803162d24", "content": "AI chain: Groq → Cerebras → Mistral → SambaNova → Ollama", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "9c2a4a4f-fc35-4da9-92ab-77ddb76b338d", "content": "WEVADS IA v3.3 GO LIVE 28 mars 2026", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "483f7afb-dd0e-46bc-a619-625ec8abaeca", "content": "Ethica: plateforme pharma HCP, 50K+ médecins Maghreb", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "879c3ae6-ab76-4739-ab92-afe2a209ea56", "content": "DeerFlow ports: frontend=3002, LangGraph=2024, gateway=8001", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "c528f9cd-6d24-42f8-94d5-4dc9154569c0", "content": "Vistex Partner Agreement signé, Referral Fee 15% ACV", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}, {"id": "a0345693-bcec-437b-8859-a78987b37bd2", "content": "KumoMTA port 587, PMTA port 25 (coexistence)", "category": "context", "confidence": 1.0, "createdAt": "2026-03-28T22:30:00Z", "source": "manual-injection"}]}