PhpRiot
Follow phpriot on Twitter
Sponsored Link

Listing 1637

Submitted by Greg, 17 October 2008
// Script specific variables
DEFINE('version', '0.40');
DEFINE('contact', 'bugs@nlindblad.org');
 
// Runtime variables
DEFINE('youtube', 'http://www.youtube.com/');
 
// Return the HTML of a page
function get_html($url) {
    sleep(1);
    $html = file($url) or die("Could not fetch the URL: $url\n");
    return $html;
}
 
// Make a filename based on a video ID
function get_filename($video_id) {
        $filename = ereg_replace("[-_\/\?=]",'',$video_id).".flv";
    return $filename; 
}
 
// Determine whether a video has alread been downloaded or not
function already_downloaded($video_id) {
    return ( file_exists(get_filename($video_id)) );
}
 
// Return a fetchable URL for a video ID
function get_fetchable($video_id) {
    $url = youtube."watch?v=".$video_id;
        $html = get_html($url);
        $matches = preg_grep("/player2.swf?/",$html);
        foreach ( $matches as $match ) {
                ereg('/player2.swf\?video_id=.*',$match,$id);
                $str = split('"',$id[0]); $str= $str[0];
        $unique_id = preg_replace("/\/player2.swf\?video_id=/",'',$str);
        }
        $video_url = youtube."get_video?video_id=".$unique_id;
    return $video_url;
}
 
// List all pages containing videos uploaded by a user
function pages($user) {
    $numbers = array(); $pages = array();
    $url = youtube.'profile_videos?user='.$user;
    $html = get_html($url);
    $matches = preg_grep("/&p=r&page=[0-9]*/",$html);
    foreach ( $matches as $match ) { ereg('page=[0-9]*',$match,$id); $numbers[] = $id[0]; }
    $numbers = array_unique($numbers);
    $str = split("=",$numbers[((int)count($numbers)-1)]); $max = (int)$str[1];
    for ( $i = 1; $i <= $max; $i++ ) { $pages[] = $url."&p=r&page=".$i; }
    if ( !count($pages) > 0 ) { $pages[] = $url."&pr&page=1"; } 
    return $pages;
}    
 
// List all videos on a particular video profile page
function videos($profile_page) {
    $hashes = array(); $videos = array();
    $html = get_html($profile_page);
    $matches = preg_grep("/\/watch\?v=.{11}/",$html);
    foreach ( $matches as $match ) { ereg('v=.{11}',$match,$id); $hashes[] = $id[0]; }
    $hashes = array_unique($hashes);
    foreach ( $hashes as $hash ) { $str = split("=",$hash); $videos[] = $str[1]; }
    return $videos;
}
 
// Figure out the unique download ID and download a video
function download($video_id) {
    $filename = get_filename($video_id);
    $video_url = get_fetchable($video_id);
    while ( ! $src = @fopen($video_url,'rb') ) {
        $video_url = get_fetchable($video_id);
    }
    $src = @fopen($video_url,'rb') or die("Could not download video: $video_id\n");
    $dst = @fopen($filename,'wb') or die("Could not download video: $video_id as $filename\n");
    while ( ! feof($src) ) {
        $buffert = fgets($src,4096);
        fputs($dst,$buffert);
    }        
    fclose($src);
    fclose($dst);
}    
 
// List all videos on a profile page based on a pattern in the title
function filtered_videos($profile_page,$pattern) {
        $hashes = array(); $videos = array();
        $html = get_html($profile_page);
        $matches = preg_grep("/\/watch\?v=.{11}/",$html);
    $matches = preg_grep("/".$pattern."/",$matches);
    foreach ( $matches as $match ) { 
        ereg("\/watch\?v=.{11}",$match,$str); 
        $str = ereg_replace("\/watch\?v=",'',$str[0]);
        $hashes[] = $str;
    }
    $hashes = array_unique($hashes);
    $videos = $hashes;
    return $videos;
}
 
// Parse command line arguments into a more suitable format.
// See <http://dev.nlindblad.org/cl-arguments.php> for details.
function arguments($argv) {
    $_ARG = array();
    foreach ($argv as $arg) {
        if (ereg('--[a-zA-Z0-9]*=.*',$arg)) {
            $str = split("=",$arg); $arg = '';
            $key = ereg_replace("--",'',$str[0]);
            for ( $i = 1; $i < count($str); $i++ ) {
                $arg .= $str[$i];
            }
                        $_ARG[$key] = $arg;
        } elseif(ereg('-[a-zA-Z0-9]',$arg)) {
            $arg = ereg_replace("-",'',$arg);
            $_ARG[$arg] = 'true';
        }
    
    }
return $_ARG;
}
 
// The main function for the script.
function main($arguments) {
    $self = basename($arguments[0]);
    $_ARG = arguments($arguments);
    // Messages outputted by the program are placed here
    // --help
    $help = array("Usage: $self [OPTION...]\n", 
    "This script downloads videos from the popular video sharing site Youtube.\n",
    "The output format is FLV (Flash Video), for a list of compatible player see:\n",
    "<http://en.wikipedia.org/wiki/FLV#Flash_Video_Players>\n\n",
    "Examples:\n",
    "  $self --id=ID\n",
    "  # Download the video with the matching ID\n",
    "  $self --url=URL\n",
    "  # Download the video with the matching URL\n",
    "  $self --user=somebody -list\n",
    "  # List all videos uploaded by 'somebody'\n",
    "  $self --user=somebody -list -filter=\"<REGEXP>\"\n",
    "  # List all videos uploaded by 'somebody' matching the REGEXP pattern <REGEXP>\n",
    "  $self --user=somebody -download\n",
    "  # Download all videos uploaded by 'somebody'\n",
    "  $self --user=somebody -download -filter=\"<REGEXP>\"\n",
    "  # Download all videos uploaded by 'somebody' matching the REGEXP pattern <REGEXP>\n\n",
    "Other options:\n",
    "  --help       display this help list\n",
    "  --version    print script version\n",
    "  --debug      outputs debugging information\n\n",
    "Report bugs to <".contact.">\n");
    // --version
    $version = array("$self ".version."\n", 
    "Copyright (C) 2007 Niklas Lindblad\n",
    "This is free software.  You may redistribute copies of it under the terms of\n",
    "the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.\n",
    "There is NO WARRANTY, to the extent permitted by law.\n\n",
    "Written by Niklas Lindblad.\n");
    // If no user provided
    $warning = array("$self: You must specify a Youtube user with `--user=<USER>'\n",
    "See `$self --help' for more information\n");
    // Output --help information
    if ( isset($_ARG['help']) && $_ARG['help'] == true || isset($_ARG['h']) ) {
        foreach ( $help as $line ) { print($line); }
        exit(0);
    }
    // Output --version information
    if ( isset($_ARG['version']) && $_ARG['version'] == true || isset($_ARG['v']) ) {
        foreach ( $version as $line ) { print($line); }
        exit(0);    
    }
    // Output --debug information
    if ( isset($_ARG['debug']) && $_ARG['debug'] == true ) {
        print("$self (".version.") debugging:\n");
        print("Runtime environment:\n");
        print(" OS:     ".PHP_OS."\n");
        print(" PHP:    ".phpversion()."\n");
        print(" Zend:   ".zend_version()."\n");
        if ( strtoupper(php_sapi_name()) == 'CLI' ) {
            print(" CLI:    true\n");
        } else {
            print(" CLI:    false\n");
        }
        print("\nRuntime configuration:\n");
        print(" allow_url_fopen ".ini_get('allow_url_fopen')."\n");
        print(" display_errors ".ini_get('display_errors')."\n");    
        exit(0);    
    }     
    // Download based on video ID
    if ( isset($_ARG['id']) && ! empty($_ARG['id']) ) {
        if ( ereg(".{11}",$_ARG['id']) ) {
            if ( ! already_downloaded($_ARG['id']) ) {
                print("Downloading video ".$_ARG['id']."\n");
                download($_ARG['id']);
            }
            exit(0);
        } else {
            print("$self: No valid video ID ".$_ARG['id']."\n");
            exit(1);
        }
    }
    // Download based on video ID extracted from URL
    if ( isset($_ARG['url']) && ! empty($_ARG['url']) ) {
        $str = parse_url($_ARG['url']);
        $video_id = ereg_replace('^v','',$str['query']);
        if ( ! already_downloaded($video_id) ) {
            print("Downloading video $video_id\n");
            download($video_id);
        }
        exit(0);
    }
    // There must be a Youtube username among the arguments
        if ( ( ! isset($_ARG['user']) || empty($_ARG['user']) ) && ! isset($_ARG['help']) ) {
                foreach ( $warning as $line ) { print($line); }
                exit(1);
        }    
    // Unfiltered listing of the videos a user has uploaded
    if ( isset($_ARG['list']) && $_ARG['list'] == true && ! isset($_ARG['filter']) ) {
        foreach ( pages($_ARG['user']) as $page ) {
                foreach ( videos($page) as $video_id ) { 
                    print($video_id." ".get_fetchable($video_id)."\n"); 
                }
        }
        exit(0);
    }
    // Unfilterted downloading of the videos a user has uploaded
    if ( isset($_ARG['download']) && $_ARG['download'] == true && ! isset($_ARG['filter']) ) {
        foreach ( pages($_ARG['user']) as $page ) {
                foreach ( videos($page) as $video_id ) { 
                    if ( ! already_downloaded($video_id) ) {
                        print("Downloading video $video_id \n");
                        download($video_id);     
                    }
                }
        }
        exit(0);
    }
    // Filtered listing of the videos a user has uploaded 
    if ( isset($_ARG['filter']) && ! empty($_ARG['filter']) && $_ARG['list'] == true ) {
        foreach ( pages($_ARG['user']) as $page ) {
            foreach ( filtered_videos($page,$_ARG['filter']) as $video_id ) {
                print($video_id." ".get_fetchable($video_id)."\n");
            }
        }    
        exit(0);
    }
    // Filtered downloading of the videos a user has uploaded
    if ( isset($_ARG['filter']) && ! empty($_ARG['filter']) && $_ARG['download'] == true ) {
        foreach ( pages($_ARG['user']) as $page ) {
                        foreach ( filtered_videos($page,$_ARG['filter']) as $video_id ) {
                                if ( ! already_downloaded($video_id) ) {
                    print("Downloading video $video_id \n");
                    download($video_id);
                }
                        }
                }       
                exit(0);
    }
}
 
main($argv);
 
?>
Submit a Follow Up