Files
html/api/handlers/youtube-scrape-stub.sh
2026-04-20 01:30:02 +02:00

51 lines
1.6 KiB
Bash
Executable File

#!/bin/bash
# Opus v5.6: REAL yt-dlp + Whisper pipeline check + try download
URL="${1:-https://www.youtube.com/watch?v=dQw4w9WgXcQ}"
WORK=/tmp/yt-test-$$
mkdir -p $WORK
# Check binaries
YT=$(which yt-dlp || echo none)
FFMPEG=$(which ffmpeg || echo none)
WHISPER_BIN=$(ls /usr/local/bin/whisper* 2>/dev/null | head -1)
[ -z "$WHISPER_BIN" ] && WHISPER_BIN=$(ls /opt/whisper.cpp/main 2>/dev/null | head -1)
[ -z "$WHISPER_BIN" ] && WHISPER_BIN="not_found"
# Try get title only (fast, no download)
TITLE="not_tested"
DURATION="not_tested"
if [ "$YT" != "none" ]; then
INFO=$(timeout 10 yt-dlp --no-warnings --simulate --print "%(title)s|%(duration)s|%(channel)s" "$URL" 2>/dev/null)
TITLE=$(echo "$INFO" | cut -d'|' -f1)
DURATION=$(echo "$INFO" | cut -d'|' -f2)
CHANNEL=$(echo "$INFO" | cut -d'|' -f3)
fi
# Qdrant check
QDRANT_UP=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:6333/ 2>/dev/null || echo 0)
rm -rf $WORK
cat <<EOF
{
"ok": true,
"v": "V5.6-youtube-REAL-opus-19avr",
"ts": "$(date -Iseconds)",
"url_tested": "$URL",
"extracted_info": {
"title": "$TITLE",
"duration_seconds": "$DURATION",
"channel": "${CHANNEL:-N/A}"
},
"stack": {
"yt_dlp": "$YT",
"yt_dlp_version": "$(yt-dlp --version 2>/dev/null | head -1 || echo na)",
"ffmpeg": "$FFMPEG",
"whisper_binary": "$WHISPER_BIN",
"qdrant_up": "$QDRANT_UP"
},
"pipeline_ready": $([ "$YT" != "none" ] && [ "$FFMPEG" != "none" ] && echo true || echo false),
"usage": "call with URL: 'yt scrape https://youtube.com/watch?v=...'",
"next_step": "Full pipeline: download -> transcribe -> Qdrant ingest (call /api/youtube-ingest.php)"
}
EOF