Listing 1637
Submitted by Greg, 17 October 2008
// Script specific variables
DEFINE('version', '0.40');
DEFINE('contact', 'bugs@nlindblad.org');
// Runtime variables
DEFINE('youtube', 'http://www.youtube.com/');
// Return the HTML of a page
function get_html($url) {
sleep(1);
$html = file($url) or die("Could not fetch the URL: $url\n");
return $html;
}
// Make a filename based on a video ID
function get_filename($video_id) {
$filename = ereg_replace("[-_\/\?=]",'',$video_id).".flv";
return $filename;
}
// Determine whether a video has alread been downloaded or not
function already_downloaded($video_id) {
return ( file_exists(get_filename($video_id)) );
}
// Return a fetchable URL for a video ID
function get_fetchable($video_id) {
$url = youtube."watch?v=".$video_id;
$html = get_html($url);
$matches = preg_grep("/player2.swf?/",$html);
foreach ( $matches as $match ) {
ereg('/player2.swf\?video_id=.*',$match,$id);
$str = split('"',$id[0]); $str= $str[0];
$unique_id = preg_replace("/\/player2.swf\?video_id=/",'',$str);
}
$video_url = youtube."get_video?video_id=".$unique_id;
return $video_url;
}
// List all pages containing videos uploaded by a user
function pages($user) {
$numbers = array(); $pages = array();
$url = youtube.'profile_videos?user='.$user;
$html = get_html($url);
$matches = preg_grep("/&p=r&page=[0-9]*/",$html);
foreach ( $matches as $match ) { ereg('page=[0-9]*',$match,$id); $numbers[] = $id[0]; }
$numbers = array_unique($numbers);
$str = split("=",$numbers[((int)count($numbers)-1)]); $max = (int)$str[1];
for ( $i = 1; $i <= $max; $i++ ) { $pages[] = $url."&p=r&page=".$i; }
if ( !count($pages) > 0 ) { $pages[] = $url."&pr&page=1"; }
return $pages;
}
// List all videos on a particular video profile page
function videos($profile_page) {
$hashes = array(); $videos = array();
$html = get_html($profile_page);
$matches = preg_grep("/\/watch\?v=.{11}/",$html);
foreach ( $matches as $match ) { ereg('v=.{11}',$match,$id); $hashes[] = $id[0]; }
$hashes = array_unique($hashes);
foreach ( $hashes as $hash ) { $str = split("=",$hash); $videos[] = $str[1]; }
return $videos;
}
// Figure out the unique download ID and download a video
function download($video_id) {
$filename = get_filename($video_id);
$video_url = get_fetchable($video_id);
while ( ! $src = @fopen($video_url,'rb') ) {
$video_url = get_fetchable($video_id);
}
$src = @fopen($video_url,'rb') or die("Could not download video: $video_id\n");
$dst = @fopen($filename,'wb') or die("Could not download video: $video_id as $filename\n");
while ( ! feof($src) ) {
$buffert = fgets($src,4096);
fputs($dst,$buffert);
}
fclose($src);
fclose($dst);
}
// List all videos on a profile page based on a pattern in the title
function filtered_videos($profile_page,$pattern) {
$hashes = array(); $videos = array();
$html = get_html($profile_page);
$matches = preg_grep("/\/watch\?v=.{11}/",$html);
$matches = preg_grep("/".$pattern."/",$matches);
foreach ( $matches as $match ) {
ereg("\/watch\?v=.{11}",$match,$str);
$str = ereg_replace("\/watch\?v=",'',$str[0]);
$hashes[] = $str;
}
$hashes = array_unique($hashes);
$videos = $hashes;
return $videos;
}
// Parse command line arguments into a more suitable format.
// See <http://dev.nlindblad.org/cl-arguments.php> for details.
function arguments($argv) {
$_ARG = array();
foreach ($argv as $arg) {
if (ereg('--[a-zA-Z0-9]*=.*',$arg)) {
$str = split("=",$arg); $arg = '';
$key = ereg_replace("--",'',$str[0]);
for ( $i = 1; $i < count($str); $i++ ) {
$arg .= $str[$i];
}
$_ARG[$key] = $arg;
} elseif(ereg('-[a-zA-Z0-9]',$arg)) {
$arg = ereg_replace("-",'',$arg);
$_ARG[$arg] = 'true';
}
}
return $_ARG;
}
// The main function for the script.
function main($arguments) {
$self = basename($arguments[0]);
$_ARG = arguments($arguments);
// Messages outputted by the program are placed here
// --help
$help = array("Usage: $self [OPTION...]\n",
"This script downloads videos from the popular video sharing site Youtube.\n",
"The output format is FLV (Flash Video), for a list of compatible player see:\n",
"<http://en.wikipedia.org/wiki/FLV#Flash_Video_Players>\n\n",
"Examples:\n",
" $self --id=ID\n",
" # Download the video with the matching ID\n",
" $self --url=URL\n",
" # Download the video with the matching URL\n",
" $self --user=somebody -list\n",
" # List all videos uploaded by 'somebody'\n",
" $self --user=somebody -list -filter=\"<REGEXP>\"\n",
" # List all videos uploaded by 'somebody' matching the REGEXP pattern <REGEXP>\n",
" $self --user=somebody -download\n",
" # Download all videos uploaded by 'somebody'\n",
" $self --user=somebody -download -filter=\"<REGEXP>\"\n",
" # Download all videos uploaded by 'somebody' matching the REGEXP pattern <REGEXP>\n\n",
"Other options:\n",
" --help display this help list\n",
" --version print script version\n",
" --debug outputs debugging information\n\n",
"Report bugs to <".contact.">\n");
// --version
$version = array("$self ".version."\n",
"Copyright (C) 2007 Niklas Lindblad\n",
"This is free software. You may redistribute copies of it under the terms of\n",
"the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.\n",
"There is NO WARRANTY, to the extent permitted by law.\n\n",
"Written by Niklas Lindblad.\n");
// If no user provided
$warning = array("$self: You must specify a Youtube user with `--user=<USER>'\n",
"See `$self --help' for more information\n");
// Output --help information
if ( isset($_ARG['help']) && $_ARG['help'] == true || isset($_ARG['h']) ) {
foreach ( $help as $line ) { print($line); }
exit(0);
}
// Output --version information
if ( isset($_ARG['version']) && $_ARG['version'] == true || isset($_ARG['v']) ) {
foreach ( $version as $line ) { print($line); }
exit(0);
}
// Output --debug information
if ( isset($_ARG['debug']) && $_ARG['debug'] == true ) {
print("$self (".version.") debugging:\n");
print("Runtime environment:\n");
print(" OS: ".PHP_OS."\n");
print(" PHP: ".phpversion()."\n");
print(" Zend: ".zend_version()."\n");
if ( strtoupper(php_sapi_name()) == 'CLI' ) {
print(" CLI: true\n");
} else {
print(" CLI: false\n");
}
print("\nRuntime configuration:\n");
print(" allow_url_fopen ".ini_get('allow_url_fopen')."\n");
print(" display_errors ".ini_get('display_errors')."\n");
exit(0);
}
// Download based on video ID
if ( isset($_ARG['id']) && ! empty($_ARG['id']) ) {
if ( ereg(".{11}",$_ARG['id']) ) {
if ( ! already_downloaded($_ARG['id']) ) {
print("Downloading video ".$_ARG['id']."\n");
download($_ARG['id']);
}
exit(0);
} else {
print("$self: No valid video ID ".$_ARG['id']."\n");
exit(1);
}
}
// Download based on video ID extracted from URL
if ( isset($_ARG['url']) && ! empty($_ARG['url']) ) {
$str = parse_url($_ARG['url']);
$video_id = ereg_replace('^v','',$str['query']);
if ( ! already_downloaded($video_id) ) {
print("Downloading video $video_id\n");
download($video_id);
}
exit(0);
}
// There must be a Youtube username among the arguments
if ( ( ! isset($_ARG['user']) || empty($_ARG['user']) ) && ! isset($_ARG['help']) ) {
foreach ( $warning as $line ) { print($line); }
exit(1);
}
// Unfiltered listing of the videos a user has uploaded
if ( isset($_ARG['list']) && $_ARG['list'] == true && ! isset($_ARG['filter']) ) {
foreach ( pages($_ARG['user']) as $page ) {
foreach ( videos($page) as $video_id ) {
print($video_id." ".get_fetchable($video_id)."\n");
}
}
exit(0);
}
// Unfilterted downloading of the videos a user has uploaded
if ( isset($_ARG['download']) && $_ARG['download'] == true && ! isset($_ARG['filter']) ) {
foreach ( pages($_ARG['user']) as $page ) {
foreach ( videos($page) as $video_id ) {
if ( ! already_downloaded($video_id) ) {
print("Downloading video $video_id \n");
download($video_id);
}
}
}
exit(0);
}
// Filtered listing of the videos a user has uploaded
if ( isset($_ARG['filter']) && ! empty($_ARG['filter']) && $_ARG['list'] == true ) {
foreach ( pages($_ARG['user']) as $page ) {
foreach ( filtered_videos($page,$_ARG['filter']) as $video_id ) {
print($video_id." ".get_fetchable($video_id)."\n");
}
}
exit(0);
}
// Filtered downloading of the videos a user has uploaded
if ( isset($_ARG['filter']) && ! empty($_ARG['filter']) && $_ARG['download'] == true ) {
foreach ( pages($_ARG['user']) as $page ) {
foreach ( filtered_videos($page,$_ARG['filter']) as $video_id ) {
if ( ! already_downloaded($video_id) ) {
print("Downloading video $video_id \n");
download($video_id);
}
}
}
exit(0);
}
}
main($argv);
?>


