50 lines
2.0 KiB
PHP
50 lines
2.0 KiB
PHP
<?php
|
|
header("Content-Type: application/json");
|
|
header("Access-Control-Allow-Origin: *");
|
|
if(($_GET["k"]??"")!=="WEVADS2026") die(json_encode(["error"=>"auth"]));
|
|
$action=$_GET["action"]??"status";
|
|
$project="/opt/weval-scrapy";
|
|
switch($action){
|
|
case "status":
|
|
$sp=array_values(array_filter(array_map(fn($f)=>basename($f,".py"),glob("$project/weval_scrapy/spiders/*.py")),fn($n)=>$n!=="__init__"));
|
|
$list=trim(shell_exec("cd $project && python3 -m scrapy list 2>&1"));
|
|
echo json_encode(["ok"=>true,"spiders"=>$sp,"scrapy_list"=>$list,"version"=>trim(shell_exec("python3 -m scrapy version 2>&1"))]);
|
|
break;
|
|
case "run":
|
|
$s=preg_replace("/[^a-z0-9_]/","", $_GET["spider"]??"");
|
|
if(!$s){echo json_encode(["error"=>"no spider"]);break;}
|
|
$out="/tmp/scrapy-$s.json";
|
|
@unlink($out);
|
|
// Use FEEDS setting instead of -o which causes issues in 2.14
|
|
$cmd="cd $project && SCRAPY_SETTINGS_MODULE=weval_scrapy.settings python3 -c \"
|
|
import scrapy.crawler as c
|
|
import json, sys
|
|
sys.path.insert(0,'.')
|
|
from weval_scrapy.spiders import *
|
|
from scrapy.crawler import CrawlerProcess
|
|
from scrapy.utils.project import get_project_settings
|
|
items=[]
|
|
class Pipe:
|
|
def process_item(self,item,spider):
|
|
items.append(dict(item))
|
|
return item
|
|
s=get_project_settings()
|
|
s['ITEM_PIPELINES']={'__main__.Pipe':300}
|
|
s['LOG_LEVEL']='WARNING'
|
|
p=CrawlerProcess(s)
|
|
p.crawl('$s')
|
|
p.start()
|
|
with open('$out','w') as f: json.dump(items,f)
|
|
print(json.dumps({'ok':True,'items':len(items)}))
|
|
\" 2>&1";
|
|
$raw=shell_exec($cmd);
|
|
$items=@json_decode(@file_get_contents($out),true)?:[];
|
|
echo json_encode(["ok"=>true,"spider"=>$s,"items"=>count($items),"data"=>array_slice($items,0,10)]);
|
|
break;
|
|
case "results":
|
|
$s=preg_replace("/[^a-z0-9_]/","", $_GET["spider"]??"");
|
|
$f="/tmp/scrapy-$s.json";
|
|
echo file_exists($f)?file_get_contents($f):json_encode(["error"=>"none"]);
|
|
break;
|
|
}
|