html/api/ss.php

<?php
if(($_GET['k']??'')!=='WEVADS2026') die('auth');
$r=[];

// Create project
$dir="/opt/weval-scrapy";
if(!is_dir($dir)){
    exec("cd /opt && python3 -m scrapy startproject weval_scrapy weval-scrapy 2>&1",$out);
    $r[]="project: ".(is_dir($dir)?"created":"tried: ".implode("|",$out));
    if(!is_dir($dir)){
        // Manual creation
        @mkdir("$dir/weval_scrapy/spiders",0755,true);
        file_put_contents("$dir/scrapy.cfg","[settings]\ndefault = weval_scrapy.settings\n[deploy]\nproject = weval_scrapy\n");
        file_put_contents("$dir/weval_scrapy/settings.py","BOT_NAME='weval_scrapy'\nSPIDER_MODULES=['weval_scrapy.spiders']\nROBOTSTXT_OBEY=True\nCONCURRENT_REQUESTS=4\nDOWNLOAD_DELAY=2\n");
        file_put_contents("$dir/weval_scrapy/__init__.py","");
        file_put_contents("$dir/weval_scrapy/spiders/__init__.py","");
        $r[]="project: manually created";
    }
}else{$r[]="project: exists";}

// HCP Spider
$sp="$dir/weval_scrapy/spiders";
file_put_contents("$sp/hcp_spider.py","import scrapy, json

class HCPSpider(scrapy.Spider):
    name = 'hcp_enrichment'
    custom_settings = {'CONCURRENT_REQUESTS':4,'DOWNLOAD_DELAY':2,'ROBOTSTXT_OBEY':True,
        'USER_AGENT':'WEVAL-HCP-Bot/1.0 (+https://weval-consulting.com)'}

    def start_requests(self):
        # Seed: Google search for HCPs
        countries = ['maroc','tunisie','algerie']
        specialties = ['cardiologue','dermatologue','generaliste','pharmacien','pediatre']
        for c in countries:
            for s in specialties:
                yield scrapy.Request(f'https://www.google.com/search?q={s}+{c}+contact',
                    callback=self.parse, meta={'country':c,'specialty':s})

    def parse(self, response):
        for result in response.css('div.g'):
            yield {
                'name': result.css('h3::text').get(),
                'url': result.css('a::attr(href)').get(),
                'snippet': result.css('.VwiC3b::text').get(),
                'country': response.meta['country'],
                'specialty': response.meta['specialty'],
            }
");
$r[]="hcp_spider: created";

// B2B Spider
file_put_contents("$sp/b2b_spider.py","import scrapy

class B2BSpider(scrapy.Spider):
    name = 'b2b_leads'
    custom_settings = {'CONCURRENT_REQUESTS':2,'DOWNLOAD_DELAY':3,'ROBOTSTXT_OBEY':True}
    start_urls = ['https://www.kerix.net/fr/annuaire-entreprises']

    def parse(self, response):
        for company in response.css('.company-item, .list-item, tr'):
            yield {
                'name': company.css('a::text, td:first-child::text').get(),
                'url': company.css('a::attr(href)').get(),
                'sector': company.css('.sector::text, td:nth-child(2)::text').get(),
            }
        next_page = response.css('a.next::attr(href)').get()
        if next_page:
            yield response.follow(next_page, self.parse)
");
$r[]="b2b_spider: created";

// Pharma Spider (doctoranytime.ma, sante.gov.ma)
file_put_contents("$sp/pharma_spider.py","import scrapy

class PharmaSpider(scrapy.Spider):
    name = 'pharma_directory'
    custom_settings = {'CONCURRENT_REQUESTS':2,'DOWNLOAD_DELAY':3,'ROBOTSTXT_OBEY':True}
    start_urls = [
        'https://www.doctoranytime.ma/specialite/medecin-generaliste',
        'https://www.doctoranytime.ma/specialite/cardiologue',
        'https://www.doctoranytime.ma/specialite/dermatologue',
    ]

    def parse(self, response):
        for doc in response.css('.doctor-card, .search-result'):
            yield {
                'name': doc.css('.doctor-name::text, h2::text').get(),
                'specialty': doc.css('.specialty::text').get(),
                'city': doc.css('.city::text, .location::text').get(),
                'profile_url': response.urljoin(doc.css('a::attr(href)').get() or ''),
            }
        next_page = response.css('a.next::attr(href), .pagination a[rel=next]::attr(href)').get()
        if next_page:
            yield response.follow(next_page, self.parse)
");
$r[]="pharma_spider: created";

// Site Monitor Spider
file_put_contents("$sp/site_monitor.py","import scrapy

class SiteMonitorSpider(scrapy.Spider):
    name = 'site_monitor'
    custom_settings = {'CONCURRENT_REQUESTS':8,'DOWNLOAD_DELAY':0.5}
    start_urls = [
        'https://weval-consulting.com/',
        'https://weval-consulting.com/wevia-ia/wevia.html',
        'https://weval-consulting.com/ai-benchmark.html',
        'https://weval-consulting.com/oss-discovery.html',
        'https://weval-consulting.com/realtime-monitor.html',
        'https://deerflow.weval-consulting.com/',
        'https://crm.weval-consulting.com/',
    ]

    def parse(self, response):
        yield {
            'url': response.url,
            'status': response.status,
            'size': len(response.body),
            'title': response.css('title::text').get(),
            'load_ms': response.meta.get('download_latency',0)*1000,
        }
");
$r[]="site_monitor: created";

$spiders=glob("$sp/*.py");
$spiders=array_values(array_filter(array_map(fn($f)=>basename($f,".py"),$spiders),fn($n)=>$n!=="__init__"));
$r[]="total_spiders: ".count($spiders)." (".implode(",",$spiders).")";

echo json_encode(["ok"=>true,"results"=>$r]);