42 lines
1.1 KiB
PHP
Executable File
42 lines
1.1 KiB
PHP
Executable File
<?php // scraping-factory ?>
|
|
<?php
|
|
header('Content-Type: application/json');
|
|
|
|
$url = $_POST['url'] ?? $_GET['url'] ?? '';
|
|
$selector = $_POST['selector'] ?? $_GET['selector'] ?? '';
|
|
$mode = $_POST['mode'] ?? $_GET['mode'] ?? 'text';
|
|
|
|
if (empty($url)) {
|
|
echo json_encode(['error' => 'URL required']);
|
|
exit;
|
|
}
|
|
|
|
$ctx = stream_context_create(['http' => [
|
|
'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
'timeout' => 10
|
|
]]);
|
|
|
|
$html = @file_get_contents($url, false, $ctx);
|
|
|
|
if (!$html) {
|
|
echo json_encode(['error' => 'Failed to fetch URL']);
|
|
exit;
|
|
}
|
|
|
|
// Extraire titre
|
|
preg_match('/<title>([^<]+)<\/title>/i', $html, $titleMatch);
|
|
$title = $titleMatch[1] ?? '';
|
|
|
|
// Nettoyer HTML
|
|
$text = strip_tags(preg_replace('/<(script|style)[^>]*>.*?<\/\1>/si', '', $html));
|
|
$text = preg_replace('/\s+/', ' ', $text);
|
|
$text = trim(substr($text, 0, 5000));
|
|
|
|
// Extraire liens
|
|
preg_match_all('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>([^<]*)<\/a>/i', $html, $links, PREG_SET_ORDER);
|
|
$linksList = array_slice(array_map(fn($l) => ['href' => $l[1], 'text' => trim($l[2])], $links), 0, 30);
|
|
|
|
?>
|
|
<?php include_once(''); ?>
|
|
|