#!/usr/bin/perl -s # # URL List Fetcher # Fetches all items on a list of URLs and saves them. # require "http.pl"; # Communications code require "flush.pl"; # &flush(FILEHANDLE) to flush a file. $timeout = 150; $crlf = "\r\n"; open(urllist,"urls.txt"); $starttime=time; while () { s/\r//g; s/\n//g; s|:80/|/|; s|:80$|/|; s|([^/])/([^./]+)$|$1/$2/|; s|//([^/]+)$|//$1/|; $url = $_; if ($url =~ m|^http://(.*)|) { $host = $1; $port = 80; # default $request = "/"; # default ($host =~ s|^([^/]+)(.*)$|$1|) && ($request = ($2||"/")); ($host =~ s/:(\d+)$//) && ($port = $1); $host =~ y/A-Z/a-z/; $request =~ m|^.*/([^/]*)$|; $filename = ($1 || "index.html"); } else { next; } print "url: $url filename: '$filename'\n"; local(@htext) = &http'fetch($host, $port, $request, "GET"); &print_htext(@htext); while($junk = shift(@htext)) {}; if (&http'invalid(@htext)) { print "$url is invalid.\n"; } else { print "Writing to file... "; open(newfile, ">".$filename); for ($i = 0; $i < ($#htext-1); $i++) { print newfile "$htext[$i]$crlf"; } print newfile "$htext[$#htext]"; close(newfile); print "... Done\n"; } sleep(1); } $endtime=time; close(urllist); $checktime=($endtime-$starttime)/86400; print LOG "start time: $starttime\n end time: $endtime\nTotal: $checktime days.\n"; sub print_htext { local(@htext) = @_; for ($i = 0; $i < $#htext; $i++) { print "$i: $htext[$i]\n"; } } sub file_length { local($filename) = @_; local($filelen) = 0; open(file,$filename); while () { $i++; } close(file); $i }