179 lines
12 KiB
Python
Executable File
179 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
WEVAL — Seed 9 empty Qdrant collections with real content
|
||
V96.10 Opus 19avr · Doctrine #4 honnêteté (real seed, pas fake)
|
||
|
||
Collections to seed:
|
||
weval_intents_memory (384d) — sample of 1579 wired intents
|
||
weval_agents_registry (384d) — sample of 950 agents
|
||
kb_lean6sigma (768d) — Lean 6σ principles
|
||
kb_dmaic_playbooks (768d) — DMAIC playbooks
|
||
kb_bpmn_flows (768d) — BPMN flows
|
||
kb_bpmn_patterns (768d) — BPMN patterns
|
||
kb_consulting_strategy (768d) — WEVAL consulting strategies
|
||
kb_vsm_best_practices (768d) — Value Stream Mapping best practices
|
||
kb_wevads_deliv (768d) — WEVADS delivery knowledge
|
||
"""
|
||
import os, json, sys, glob, subprocess
|
||
from pathlib import Path
|
||
from qdrant_client import QdrantClient
|
||
from qdrant_client.models import PointStruct
|
||
from sentence_transformers import SentenceTransformer
|
||
import uuid
|
||
|
||
QDRANT = "http://localhost:6333"
|
||
client = QdrantClient(QDRANT)
|
||
|
||
print("Loading models...")
|
||
m384 = SentenceTransformer('all-MiniLM-L6-v2')
|
||
m768 = SentenceTransformer('all-mpnet-base-v2')
|
||
print("Models loaded\n")
|
||
|
||
# ═══════════════════════════════════════════════════════════════════
|
||
# 1. weval_intents_memory (384d) — from /wired-pending/ filenames
|
||
# ═══════════════════════════════════════════════════════════════════
|
||
print("=== 1. weval_intents_memory ===")
|
||
intent_files = glob.glob('/var/www/html/api/wired-pending/intent-*.php')[:50]
|
||
intents_docs = []
|
||
for f in intent_files:
|
||
name = os.path.basename(f).replace('intent-', '').replace('.php', '')
|
||
# Read first 500 chars for context
|
||
try:
|
||
with open(f, 'r', errors='ignore') as file: preview = file.read()[:500]
|
||
except: preview = ''
|
||
doc = f"Intent: {name} · File: {os.path.basename(f)} · Preview: {preview[:200]}"
|
||
intents_docs.append({"name": name, "doc": doc, "file": os.path.basename(f)})
|
||
|
||
print(f" docs: {len(intents_docs)}")
|
||
embeddings = m384.encode([d['doc'] for d in intents_docs], show_progress_bar=False)
|
||
points = [
|
||
PointStruct(id=str(uuid.uuid4()), vector=emb.tolist(), payload={
|
||
"name": intents_docs[i]['name'], "type": "intent",
|
||
"file": intents_docs[i]['file'], "source": "wired-pending-glob"
|
||
})
|
||
for i, emb in enumerate(embeddings)
|
||
]
|
||
client.upsert(collection_name="weval_intents_memory", points=points)
|
||
print(f" upserted: {len(points)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════════════
|
||
# 2. weval_agents_registry (384d) — from /agent-stubs/ or registry
|
||
# ═══════════════════════════════════════════════════════════════════
|
||
print("\n=== 2. weval_agents_registry ===")
|
||
agent_files = glob.glob('/var/www/html/api/agent-stubs/*.php')[:50]
|
||
if not agent_files:
|
||
agent_files = glob.glob('/var/www/html/api/wired-pending/intent-opus4-*agent*')[:50]
|
||
agents_docs = []
|
||
for f in agent_files:
|
||
name = os.path.basename(f).replace('.php', '')
|
||
try:
|
||
with open(f, 'r', errors='ignore') as file: preview = file.read()[:500]
|
||
except: preview = ''
|
||
doc = f"Agent: {name} · Preview: {preview[:200]}"
|
||
agents_docs.append({"name": name, "doc": doc, "file": os.path.basename(f)})
|
||
|
||
print(f" docs: {len(agents_docs)}")
|
||
if agents_docs:
|
||
embeddings = m384.encode([d['doc'] for d in agents_docs], show_progress_bar=False)
|
||
points = [
|
||
PointStruct(id=str(uuid.uuid4()), vector=emb.tolist(), payload={
|
||
"name": agents_docs[i]['name'], "type": "agent",
|
||
"file": agents_docs[i]['file'], "source": "agent-stubs-glob"
|
||
})
|
||
for i, emb in enumerate(embeddings)
|
||
]
|
||
client.upsert(collection_name="weval_agents_registry", points=points)
|
||
print(f" upserted: {len(points)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════════════
|
||
# 3-9. KB collections (768d) — seed with domain-specific content
|
||
# ═══════════════════════════════════════════════════════════════════
|
||
KB_CONTENT = {
|
||
"kb_lean6sigma": [
|
||
"Voice of Customer (VOC) — translate customer needs into CTQ (Critical To Quality) specifications. Start every Lean 6σ project with VOC.",
|
||
"DMAIC methodology — Define → Measure → Analyze → Improve → Control. 5-phase structured problem-solving.",
|
||
"Sigma level vs DPMO — 6σ = 3.4 defects per million opportunities. 5σ = 233. 4σ = 6210. Higher sigma = better quality.",
|
||
"Value vs Non-Value Added — classify all process steps. Target: maximize VA, eliminate Non-VA, reduce Necessary Non-VA.",
|
||
"Pareto Principle (80/20) — 80% of defects come from 20% of causes. Focus improvement on vital few.",
|
||
"FMEA (Failure Mode Effects Analysis) — systematic approach. Severity × Occurrence × Detection = RPN. Priority: RPN > 100.",
|
||
"5S methodology — Sort, Set in order, Shine, Standardize, Sustain. Foundation of Lean workplace.",
|
||
"Control charts — X-bar R, X-bar S, p-chart, np-chart, c-chart, u-chart. SPC monitoring tools.",
|
||
"Gemba walk — go see where work happens. Lean leadership practice for continuous improvement.",
|
||
"Kaizen events — 3-5 day focused improvement workshops. Cross-functional team, rapid deployment.",
|
||
],
|
||
"kb_dmaic_playbooks": [
|
||
"DEFINE phase — SIPOC diagram (Supplier Input Process Output Customer), project charter, problem statement, team formation, stakeholder analysis.",
|
||
"MEASURE phase — data collection plan, measurement system analysis (MSA), baseline sigma level, process capability Cp/Cpk study.",
|
||
"ANALYZE phase — fishbone diagram (Ishikawa), 5 Whys, hypothesis testing (t-test, ANOVA, regression), root cause validation.",
|
||
"IMPROVE phase — design of experiments (DOE), pilot testing, risk analysis, implementation plan, change management.",
|
||
"CONTROL phase — control plan, SPC charts, standard work, training, handoff to process owner, lessons learned.",
|
||
"DMAIC tollgate reviews — formal phase transitions with deliverables review. Sponsor approval required.",
|
||
"DMAIC vs DMADV — use DMAIC for existing process improvement, DMADV (Define-Measure-Analyze-Design-Verify) for new process design.",
|
||
],
|
||
"kb_bpmn_flows": [
|
||
"BPMN 2.0 standard — Business Process Model and Notation. ISO 19510. Universal diagramming language for business processes.",
|
||
"Pool vs Swimlane — pool represents a participant/organization, swimlanes within pool represent roles/departments.",
|
||
"Tasks — atomic work units. Types: User Task, Service Task, Script Task, Manual Task, Business Rule Task, Send/Receive Task.",
|
||
"Gateways — decision points. Types: Exclusive (XOR), Parallel (AND), Inclusive (OR), Event-Based, Complex.",
|
||
"Events — triggers and outcomes. Start, Intermediate, End. Types: Message, Timer, Error, Signal, Compensation, Terminate.",
|
||
"Sequence flows vs Message flows — sequence = within pool, message = between pools.",
|
||
"Sub-processes and Call Activities — encapsulate complex flows for reuse and hierarchy.",
|
||
],
|
||
"kb_bpmn_patterns": [
|
||
"Sequential pattern — tasks in strict order. Foundation building block.",
|
||
"Parallel split (AND-split) — fork into concurrent flows. Merge with AND-join.",
|
||
"Exclusive choice (XOR) — one path selected based on condition. Mutually exclusive branches.",
|
||
"Inclusive choice (OR) — one or more paths based on conditions.",
|
||
"Deferred choice — path selected by first event occurring (race).",
|
||
"Loop / Multi-instance — repeat activity N times or for each item in collection.",
|
||
"Compensation — undo completed work on failure. Transactional rollback pattern.",
|
||
],
|
||
"kb_consulting_strategy": [
|
||
"WEVAL consulting core methodology — sovereign AI platform + Lean 6σ + ERP gap-fill. 25 ERPs × 60 pain points × 950 agents = 17.36M€ savings/client.",
|
||
"WEVAL pricing — Discovery 5k€ + POC 15-25k€ + Rollout 80-300k€ + Managed 30-80k€/an. Modular engagement.",
|
||
"Client archetype — CFO buyer (pain: manual Close, SAP extensions), CTO enabler (pain: integration sprawl), COO sponsor (pain: pipeline bottlenecks).",
|
||
"WEVAL differentiator — sovereignty (13-provider cascade, 0€ LLM), multi-ERP (not vendor-locked), live catalog (60 PPs with avg 180k€ savings).",
|
||
"Sales playbook — Pain Points Atlas entry → ROI Simulator → POC proposal → Rollout contract. Dogfood proof: WEVAL closes 35 gaps on itself = 2.4M€ savings.",
|
||
"Ethica partnership — Kaouther Najar group. 156714 HCPs (DZ 112k + MA 19k + TN 17k + INTL). Campaign 109920 draft @10k/day. 0.8DH/contact counter [1.5/1.2/1.0DH].",
|
||
],
|
||
"kb_vsm_best_practices": [
|
||
"Value Stream Mapping — visualize entire material + information flow from supplier to customer. Start with current state map.",
|
||
"Takt time vs Cycle time — Takt = demand rhythm (available time / demand), Cycle = actual time per unit. Balance: cycle ≤ takt.",
|
||
"Flow efficiency = Value-Added Time / Total Lead Time. Typical: <10%. World-class: >25%.",
|
||
"Pull vs Push — Kanban pull system replenishes based on consumption. Eliminates overproduction (#1 waste).",
|
||
"7 wastes (TIMWOOD) — Transport, Inventory, Motion, Waiting, Overproduction, Over-processing, Defects. 8th added: Skills underutilization.",
|
||
"Future state map — redesigned VSM with improvements. Aim for continuous flow + pull + leveled schedule.",
|
||
"Spaghetti diagram — trace physical movement. Reveals excessive travel (Motion waste).",
|
||
],
|
||
"kb_wevads_deliv": [
|
||
"WEVADS architecture — PostgreSQL adx_system+adx_clients (6.65M contacts) · Apache dual vhosts 5821+5890 · PowerMTA+Kumo+Postfix 3 MTAs · PHP 8.5-FPM · N8N workflows · OVH tracking 151.80.235.110.",
|
||
"WEVADS pipeline E2E — Send (PMTA) → Open (tracking pixel) → Click (link shortener) → Conversion (pull API from CAKE/Everflow affiliate networks). Conversion PULL not postback.",
|
||
"WEVADS O365 — 604 accounts across 9+ tenants. Graph API creation. Users per tenant 500 cap. accoff04/06 primary. 97pct inbox rate via PMTA→O365 relay.",
|
||
"WEVADS Ethica delivery — dns wevup.app · SPF+DKIM 2048bit+DMARC · Cloudflare zone 53e067fbc5c532a1 · PTR mail.weval-consulting.com · consent.wevup.app live 17 real optins.",
|
||
"WEVADS seed network — 1275 seed accounts across 8 ISPs. Warmup 1783 accounts. Cap 77170 emails/day. Quality Guard enforces good_creatives threshold.",
|
||
"WEVADS sovereign IA — 13 providers 0€ cascade (Cerebras+Groq+CF+Gemini+SambaNova+NVIDIA+Mistral+HF+OpenRouter+GitHub+Ollama). Fallback auto on 429/402/401.",
|
||
],
|
||
}
|
||
|
||
for col_name, docs in KB_CONTENT.items():
|
||
print(f"\n=== 3+. {col_name} ===")
|
||
print(f" docs: {len(docs)}")
|
||
embeddings = m768.encode(docs, show_progress_bar=False)
|
||
points = [
|
||
PointStruct(id=str(uuid.uuid4()), vector=emb.tolist(), payload={
|
||
"content": docs[i], "type": col_name.replace('kb_', ''),
|
||
"source": "V96.10-seed", "seeded_at": "2026-04-19"
|
||
})
|
||
for i, emb in enumerate(embeddings)
|
||
]
|
||
client.upsert(collection_name=col_name, points=points)
|
||
print(f" upserted: {len(points)}")
|
||
|
||
print("\n" + "="*60)
|
||
print("🏆 SEED COMPLETE")
|
||
print("="*60)
|
||
# Summary
|
||
for col_name in ['weval_intents_memory', 'weval_agents_registry'] + list(KB_CONTENT.keys()):
|
||
info = client.get_collection(col_name)
|
||
print(f" {col_name:30} points={info.points_count}")
|