auto-sync-0135
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"agent": "V41_Disk_Monitor",
|
||||
"ts": "2026-04-22T01:00:01+02:00",
|
||||
"ts": "2026-04-22T01:30:01+02:00",
|
||||
"disk_pct": 83,
|
||||
"disk_free_gb": 25,
|
||||
"growth_per_day_gb": 1.5,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"agent": "V41_Risk_Escalation",
|
||||
"ts": "2026-04-22T01:15:03+02:00",
|
||||
"ts": "2026-04-22T01:30:03+02:00",
|
||||
"dg_alerts_active": 7,
|
||||
"wevia_life_stats_preview": "{
|
||||
"ok": true,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"agent": "V45_Leads_Sync",
|
||||
"ts": "2026-04-22T01:20:03+02:00",
|
||||
"ts": "2026-04-22T01:30:04+02:00",
|
||||
"paperclip_total": 48,
|
||||
"active_customer": 4,
|
||||
"warm_prospect": 5,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"agent": "V54_Risk_Monitor_Live",
|
||||
"ts": "2026-04-22T01:00:04+02:00",
|
||||
"ts": "2026-04-22T01:30:04+02:00",
|
||||
"critical_risks": {
|
||||
"RW01_pipeline_vide": {
|
||||
"pipeline_keur": 0,
|
||||
@@ -22,7 +22,7 @@
|
||||
},
|
||||
"RW12_burnout": {
|
||||
"agents_cron_active": 15,
|
||||
"load_5min": "8.93",
|
||||
"load_5min": "7.65",
|
||||
"automation_coverage_pct": 70,
|
||||
"residual_risk_pct": 60,
|
||||
"trend": "V52_goldratt_options_active"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"generated_at": "2026-04-22T01:30:01.977031",
|
||||
"generated_at": "2026-04-22T01:35:01.678947",
|
||||
"stats": {
|
||||
"total": 48,
|
||||
"pending": 31,
|
||||
|
||||
@@ -1,27 +1,27 @@
|
||||
{
|
||||
"ok": true,
|
||||
"agent": "V42_MQL_Scoring_Agent_REAL",
|
||||
"ts": "2026-04-21T23:20:02+00:00",
|
||||
"ts": "2026-04-21T23:30:01+00:00",
|
||||
"status": "DEPLOYED_AUTO",
|
||||
"deployed": true,
|
||||
"algorithm": "weighted_behavioral_signals",
|
||||
"signals_tracked": {
|
||||
"wtp_engagement": 94,
|
||||
"chat_engagement": 0,
|
||||
"wtp_engagement": 62,
|
||||
"chat_engagement": 3,
|
||||
"roi_tool": 0,
|
||||
"email_opened": 0
|
||||
},
|
||||
"avg_score": 23.5,
|
||||
"avg_score": 16.3,
|
||||
"mql_threshold": 50,
|
||||
"sql_threshold": 75,
|
||||
"leads_captured": 48,
|
||||
"mql_auto_scored": 20,
|
||||
"mql_auto_scored": 19,
|
||||
"sql_auto_scored": 8,
|
||||
"mql_auto_pct": 41,
|
||||
"mql_auto_pct": 39,
|
||||
"improvement_vs_manual": {
|
||||
"before_manual_pct": 33.3,
|
||||
"after_auto_pct": 41,
|
||||
"delta": 7.700000000000003
|
||||
"after_auto_pct": 39,
|
||||
"delta": 5.700000000000003
|
||||
},
|
||||
"paperclip_db_ok": true,
|
||||
"paperclip_tables": 1,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"timestamp": "2026-04-22T01:00:15",
|
||||
"timestamp": "2026-04-22T01:30:15",
|
||||
"features": {
|
||||
"total": 36,
|
||||
"pass": 35
|
||||
@@ -13,7 +13,7 @@
|
||||
"score": 97.2,
|
||||
"log": [
|
||||
"=== UX AGENT v1.0 ===",
|
||||
"Time: 2026-04-22 01:00:02",
|
||||
"Time: 2026-04-22 01:30:02",
|
||||
" core: 4/4",
|
||||
" layout: 3/4",
|
||||
" interaction: 6/6",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"ok": true,
|
||||
"version": "V83-business-kpi",
|
||||
"ts": "2026-04-21T23:29:41+00:00",
|
||||
"ts": "2026-04-21T23:33:41+00:00",
|
||||
"summary": {
|
||||
"total_categories": 8,
|
||||
"total_kpis": 64,
|
||||
|
||||
File diff suppressed because one or more lines are too long
188
wiki/session-V149-ethica-enrichment-broken-diagnostic.md
Normal file
188
wiki/session-V149-ethica-enrichment-broken-diagnostic.md
Normal file
@@ -0,0 +1,188 @@
|
||||
# V149 - Ethica enrichment pipeline broken diagnostic - 2026-04-22
|
||||
|
||||
## Objectif Yacine
|
||||
"Ethica NOT_READY (email_gap 51k, DZ 1 email/200) → besoin enrichment"
|
||||
|
||||
Investiguer pourquoi enrichment ne progresse pas, identifier root cause.
|
||||
|
||||
## Discovery V149
|
||||
|
||||
### Infrastructure Ethica MASSIVEMENT existante
|
||||
|
||||
```
|
||||
/var/www/html/api/ethica-api.php (main API endpoint)
|
||||
/var/www/ethica/api/ethica-api.php (dedicated dir API)
|
||||
/var/www/ethica/public/ethica-api.php (public endpoint)
|
||||
/var/www/weval/api/ethica-api.php (weval mirror)
|
||||
/opt/ethica-validator.py (Google Maps validator)
|
||||
/opt/ethica-richscraper.py (1sante + tabibi + santeaumaroc)
|
||||
/opt/ethica-enrich-v4.py (cron 01:00 daily, 300 records)
|
||||
/opt/ethica-enrich-searxng.py (cron 10:00 daily, 200 records)
|
||||
/opt/fmgapp/public/api/ethica-*.php (5 specialty APIs)
|
||||
/opt/wevia-brain/ethica-boost-scraper.php (boost scraper)
|
||||
/opt/deer-flow/skills/weval/skills/ethica-scrape.md (skill)
|
||||
```
|
||||
|
||||
### DB tables ethica schema
|
||||
```
|
||||
ethica.campaigns
|
||||
ethica.consent_log
|
||||
ethica.consent_tokens
|
||||
ethica.cromc_reference
|
||||
ethica.crossvalidator_audit
|
||||
ethica.email_validation_log
|
||||
ethica.hcp_summary
|
||||
ethica.medecins_clean (VIEW)
|
||||
ethica.medecins_clean_gold_20260328 (TABLE)
|
||||
ethica.medecins_real (VIEW)
|
||||
ethica.medecins_real_gold_20260328 (TABLE)
|
||||
ethica.medecins_real_gold_20260420_v39 (TABLE) ← SNAPSHOT 20 avril
|
||||
ethica.medecins_validated (TABLE, dashboard source, 161,733 rows)
|
||||
```
|
||||
|
||||
### Cron scrapers ACTIFS 3x/jour
|
||||
```
|
||||
0 1 * * * python3 /opt/ethica-enrich-v4.py 300
|
||||
0 10 * * * python3 /opt/ethica-enrich-searxng.py 200
|
||||
0 11,23 * * * python3 /opt/ethica-richscraper.py 500
|
||||
```
|
||||
|
||||
Théoriquement: 1500 records/jour enrichis.
|
||||
|
||||
## Root Cause identifié 🔥
|
||||
|
||||
**Les 3 scripts ciblent `ethica.medecins` — table QUI N'EXISTE PAS.**
|
||||
|
||||
Preuves :
|
||||
1. `information_schema.tables WHERE table_name='medecins'` → 0 rows
|
||||
2. Scripts font `INSERT INTO ethica.medecins ... ON CONFLICT(email) DO UPDATE`
|
||||
3. PostgreSQL ERROR → silencieux dans `subprocess.run` logs
|
||||
4. Log affiche "48899 total" = **state file stale** (/tmp/ethica-rs-state.json)
|
||||
|
||||
Test live V149:
|
||||
```bash
|
||||
sudo python3 /opt/ethica-richscraper.py 5
|
||||
→ SESSION: +0 | DB: 48899 total, 13540 with phone
|
||||
```
|
||||
|
||||
**Zero progression réelle.** Les "+60 phones" des logs précédents = trompeuse (probably cumulative from state file).
|
||||
|
||||
### Table réelle dashboard
|
||||
```
|
||||
ethica.medecins_validated: 161,733 rows (dashboard source)
|
||||
```
|
||||
|
||||
Les scripts n'y écrivent jamais. Gap email 51k ne se réduit pas.
|
||||
|
||||
### Gold snapshot 20 avril
|
||||
`ethica.medecins_real_gold_20260420_v39: 161,730 rows`
|
||||
= snapshot archive récent. Quelqu'un a fait backup/migration partielle
|
||||
il y a 2 jours, probablement renommé ethica.medecins → v39 snapshot
|
||||
SANS mettre à jour les 3 scripts enrichment.
|
||||
|
||||
## Pourquoi dashboard montre 68% emails ?
|
||||
|
||||
Dashboard `hcp_summary` query runs sur `medecins_validated`. Mais
|
||||
cette table a déjà 110k emails. Les 51k gap = données historiques
|
||||
importées d'autres sources (scraping TN via tabibi.tn, sante-ma,
|
||||
SearxNG, Google Maps).
|
||||
|
||||
L'enrichment broken = on ne progresse PAS pour combler le gap.
|
||||
Mais le gap est stable, pas en croissance.
|
||||
|
||||
## Pour le pilot DZ generaliste
|
||||
|
||||
```
|
||||
total_dz_mg: 10,063 HCPs
|
||||
Sur 200 sampled: 1 email (!)
|
||||
199 autres: email=None
|
||||
```
|
||||
|
||||
C'est cohérent: les HCPs DZ sont mal couverts par scrapers (scripts
|
||||
TN et Maroc-heavy). Sources DZ email non disponibles.
|
||||
|
||||
## Options recommandées (ACTION YACINE)
|
||||
|
||||
### Option A - Refactor 3 scripts
|
||||
Modifier `ethica.medecins` → `ethica.medecins_validated` dans:
|
||||
- ethica-enrich-v4.py
|
||||
- ethica-enrich-searxng.py
|
||||
- ethica-richscraper.py
|
||||
|
||||
Risque: régression sur scraping existant
|
||||
Effort: 3×sed + tests
|
||||
GOLD préalable obligatoire
|
||||
|
||||
### Option B - VIEW writable alias
|
||||
```sql
|
||||
CREATE VIEW ethica.medecins AS SELECT * FROM ethica.medecins_validated;
|
||||
CREATE TRIGGER medecins_insert INSTEAD OF INSERT ON ethica.medecins ...
|
||||
```
|
||||
|
||||
Pros: zero modif scripts, compat garantie
|
||||
Cons: PostgreSQL triggers complexes, autres queries peuvent se confuser
|
||||
|
||||
### Option C - Nouveau enrichment script from scratch
|
||||
Dédié DZ generaliste avec sources:
|
||||
- SerpAPI Google query "dr X generalist algerie email"
|
||||
- HunterIO domain search for clinics DZ
|
||||
- LinkedIn Sales Navigator (manual export)
|
||||
- ordre des medecins DZ annuaire direct
|
||||
|
||||
Pros: ciblé pilot DZ, propre from scratch
|
||||
Cons: temps dev, API costs SerpAPI/Hunter
|
||||
|
||||
### Option D - SKIP enrichment Ethica
|
||||
Pivot stratégique: ne pas combler gap DZ generaliste, aller vers:
|
||||
- Specialties mieux couvertes (où emails > 50%)
|
||||
- Pays mieux couvertes (Maroc? Tunisie?)
|
||||
- Ou attendre phase enrichment B2B
|
||||
|
||||
## V149 Actions réalisées (minimal)
|
||||
|
||||
**V149 = RAPPORT read-only. AUCUNE modification.**
|
||||
|
||||
- ✅ Scan exhaustif infrastructure Ethica
|
||||
- ✅ Identification root cause (scripts → table inexistante)
|
||||
- ✅ Verification live run (0 enrichment actual)
|
||||
- ✅ Mapping DB tables ethica schema
|
||||
- ✅ Test richscraper batch 5 → confirme 0 net
|
||||
- ✅ Wiki V149 publié
|
||||
|
||||
**Zero modification code, zero modification DB, zero modification cron.**
|
||||
|
||||
Yacine décide Option A/B/C/D.
|
||||
|
||||
## L99 zero régression
|
||||
153/153 PASS maintenu (18 versions V125-V149).
|
||||
|
||||
## Doctrines V149
|
||||
|
||||
- 0 Root cause (silencieux cron découvert)
|
||||
- 4 Zero régression (pas touché)
|
||||
- 14 Test-driven (run live richscraper batch 5 confirm 0)
|
||||
- 95 Traçabilité wiki complète
|
||||
- 100 Train release
|
||||
|
||||
## Recap chain V131-V149
|
||||
|
||||
```
|
||||
V131 Routing 100%
|
||||
V132 Playwright 12/12
|
||||
V133-V134 4/4 hubs
|
||||
V135-V136 Admin repoint
|
||||
V137-V138 Widget fix
|
||||
V139 Filter + chatbot
|
||||
V140 Defense chattr
|
||||
V141 Handoff
|
||||
V142 Form early-log + audits
|
||||
V143 Session default split 2481
|
||||
V144 Ambre cache x250
|
||||
V145-V146 Sessions_sources KPI
|
||||
V147 Ethica/Vistex audit
|
||||
V148 NULL→legacy
|
||||
V149 Ethica enrichment diagnostic (broken cron detected, 0 auto-fix)
|
||||
```
|
||||
|
||||
Honnêteté > vitesse. Je ne touche pas à 3 scripts de production
|
||||
sans approbation explicite du propriétaire.
|
||||
Reference in New Issue
Block a user