Files
wevads-platform/scripts/hamid-scrape.php
2026-02-26 04:53:11 +01:00

54 lines
1.6 KiB
PHP
Executable File

<?php
header('Content-Type: application/json');
$url = $_POST['url'] ?? $_GET['url'] ?? '';
$selector = $_POST['selector'] ?? $_GET['selector'] ?? '';
$mode = $_POST['mode'] ?? $_GET['mode'] ?? 'text';
if (empty($url)) {
echo json_encode(['error' => 'URL required']);
exit;
}
$ctx = stream_context_create(['http' => [
'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'timeout' => 10
]]);
$html = @file_get_contents($url, false, $ctx);
if (!$html) {
echo json_encode(['error' => 'Failed to fetch URL']);
exit;
}
// Extraire titre
preg_match('/<title>([^<]+)<\/title>/i', $html, $titleMatch);
$title = $titleMatch[1] ?? '';
// Nettoyer HTML
$text = strip_tags(preg_replace('/<(script|style)[^>]*>.*?<\/\1>/si', '', $html));
$text = preg_replace('/\s+/', ' ', $text);
$text = trim(substr($text, 0, 5000));
// Extraire liens
preg_match_all('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>([^<]*)<\/a>/i', $html, $links, PREG_SET_ORDER);
$linksList = array_slice(array_map(fn($l) => ['href' => $l[1], 'text' => trim($l[2])], $links), 0, 30);
// Extraire images
preg_match_all('/<img[^>]+src=["\']([^"\']+)["\']/i', $html, $images);
$imagesList = array_slice($images[1] ?? [], 0, 20);
$result = ['success' => true, 'title' => $title, 'url' => $url];
switch ($mode) {
case 'text': $result['text'] = $text; break;
case 'links': $result['links'] = $linksList; break;
case 'images': $result['images'] = $imagesList; break;
case 'html': $result['html'] = substr($html, 0, 10000); break;
default: $result['text'] = $text;
}
echo json_encode($result);