#!/usr/bin/perl -s # # URL List Fixer # corrects URLs to a stanard form as follows: # remove any :80/ references. # change hostname to lower case # put / at the end of all directory names # require "flush.pl"; open(urllist,"urls.txt"); while () { s|\r||g; s|\n||g; s|:80/|/|; s|:80$|/|; s|([^/])/([^./]+)$|$1/$2/|; s|//([^/]+)$|//$1/|; $url = $_; $url =~ m|^(.*)://(.*)$| ; $protocol = $1; $host = $2; ($host =~ s|^([^/]+)(.*)$|$1|) && ($request = ($2||"/")); $host =~ y/A-Z/a-z/; print "$protocol://$host$request\n"; } close(urllist);