"auth"])); $action=$_GET["action"]??"status"; $project="/opt/weval-scrapy"; switch($action){ case "status": $sp=array_values(array_filter(array_map(fn($f)=>basename($f,".py"),glob("$project/weval_scrapy/spiders/*.py")),fn($n)=>$n!=="__init__")); $list=trim(shell_exec("cd $project && python3 -m scrapy list 2>&1")); echo json_encode(["ok"=>true,"spiders"=>$sp,"scrapy_list"=>$list,"version"=>trim(shell_exec("python3 -m scrapy version 2>&1"))]); break; case "run": $s=preg_replace("/[^a-z0-9_]/","", $_GET["spider"]??""); if(!$s){echo json_encode(["error"=>"no spider"]);break;} $out="/tmp/scrapy-$s.json"; @unlink($out); // Use FEEDS setting instead of -o which causes issues in 2.14 $cmd="cd $project && SCRAPY_SETTINGS_MODULE=weval_scrapy.settings python3 -c \" import scrapy.crawler as c import json, sys sys.path.insert(0,'.') from weval_scrapy.spiders import * from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings items=[] class Pipe: def process_item(self,item,spider): items.append(dict(item)) return item s=get_project_settings() s['ITEM_PIPELINES']={'__main__.Pipe':300} s['LOG_LEVEL']='WARNING' p=CrawlerProcess(s) p.crawl('$s') p.start() with open('$out','w') as f: json.dump(items,f) print(json.dumps({'ok':True,'items':len(items)})) \" 2>&1"; $raw=shell_exec($cmd); $items=@json_decode(@file_get_contents($out),true)?:[]; echo json_encode(["ok"=>true,"spider"=>$s,"items"=>count($items),"data"=>array_slice($items,0,10)]); break; case "results": $s=preg_replace("/[^a-z0-9_]/","", $_GET["spider"]??""); $f="/tmp/scrapy-$s.json"; echo file_exists($f)?file_get_contents($f):json_encode(["error"=>"none"]); break; }