Files
wevads-platform/scripts/scraping-factory.php
2026-02-26 04:53:11 +01:00

42 lines
1.1 KiB
PHP
Executable File

<?php // scraping-factory ?>
<?php
header('Content-Type: application/json');
$url = $_POST['url'] ?? $_GET['url'] ?? '';
$selector = $_POST['selector'] ?? $_GET['selector'] ?? '';
$mode = $_POST['mode'] ?? $_GET['mode'] ?? 'text';
if (empty($url)) {
echo json_encode(['error' => 'URL required']);
exit;
}
$ctx = stream_context_create(['http' => [
'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'timeout' => 10
]]);
$html = @file_get_contents($url, false, $ctx);
if (!$html) {
echo json_encode(['error' => 'Failed to fetch URL']);
exit;
}
// Extraire titre
preg_match('/<title>([^<]+)<\/title>/i', $html, $titleMatch);
$title = $titleMatch[1] ?? '';
// Nettoyer HTML
$text = strip_tags(preg_replace('/<(script|style)[^>]*>.*?<\/\1>/si', '', $html));
$text = preg_replace('/\s+/', ' ', $text);
$text = trim(substr($text, 0, 5000));
// Extraire liens
preg_match_all('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>([^<]*)<\/a>/i', $html, $links, PREG_SET_ORDER);
$linksList = array_slice(array_map(fn($l) => ['href' => $l[1], 'text' => trim($l[2])], $links), 0, 30);
?>
<?php include_once(''); ?>