Files
html/api/fs.php
2026-04-12 22:57:03 +02:00

17 lines
2.1 KiB
PHP

<?php
if(($_GET['k']??'')!=='WEVADS2026') die('auth');
$dir="/opt/weval-scrapy";$sp="$dir/weval_scrapy/spiders";
@mkdir($sp,0755,true);
file_put_contents("$dir/scrapy.cfg","[settings]\ndefault=weval_scrapy.settings\n[deploy]\nproject=weval_scrapy\n");
file_put_contents("$dir/weval_scrapy/__init__.py","");
file_put_contents("$dir/weval_scrapy/spiders/__init__.py","");
file_put_contents("$dir/weval_scrapy/settings.py","BOT_NAME='weval_scrapy'\nSPIDER_MODULES=['weval_scrapy.spiders']\nROBOTSTXT_OBEY=True\nCONCURRENT_REQUESTS=4\nDOWNLOAD_DELAY=2\n");
file_put_contents("$sp/site_monitor.py","import scrapy\nclass SiteMonitorSpider(scrapy.Spider):\n name='site_monitor'\n start_urls=['https://weval-consulting.com/','https://deerflow.weval-consulting.com/','https://crm.weval-consulting.com/']\n def parse(self,r):\n yield{'url':r.url,'status':r.status,'size':len(r.body),'title':r.css('title::text').get()}\n");
file_put_contents("$sp/hcp_spider.py","import scrapy\nclass HCPSpider(scrapy.Spider):\n name='hcp_enrichment'\n custom_settings={'DOWNLOAD_DELAY':3}\n start_urls=['https://www.doctoranytime.ma/specialite/medecin-generaliste']\n def parse(self,r):\n for d in r.css('.doctor-card,article'):\n yield{'name':d.css('h2::text,.name::text').get(),'url':r.urljoin(d.css('a::attr(href)').get() or '')}\n");
file_put_contents("$sp/b2b_spider.py","import scrapy\nclass B2BSpider(scrapy.Spider):\n name='b2b_leads'\n start_urls=['https://www.kerix.net/fr']\n def parse(self,r):\n yield{'title':r.css('title::text').get(),'url':r.url}\n");
file_put_contents("$sp/pharma_spider.py","import scrapy\nclass PharmaSpider(scrapy.Spider):\n name='pharma_directory'\n start_urls=['https://www.sante.gov.ma/']\n def parse(self,r):\n yield{'title':r.css('title::text').get(),'url':r.url}\n");
$spiders=array_values(array_filter(array_map(fn($f)=>basename($f,".py"),glob("$sp/*.py")),fn($n)=>$n!=="__init__"));
// Test
$test=shell_exec("cd $dir && python3 -m scrapy list 2>&1");
echo json_encode(["ok"=>true,"dir"=>is_dir($dir),"spiders"=>$spiders,"scrapy_list"=>trim($test)]);