I've made a perl based bot that searches the net 
      for emails (a.k.a. spam bot). 
      I couldn't help but grin when I saw your email was 
      "fravia(at)linuxmail(point)org".
      
					Kevin Jobson.#!/usr/bin/perl
use LWP::UserAgent;
require 
"subparseform.lib";
&Parse_Form;
$varurl = 
$formdata{'varurl'}; # url permanente de base (ex. www.infinit.com)
$baseurl 
= $formdata{'base_url'}; # url variante (ex. 
www.infinit.com/blabla/encorebla/)
$newurl = $formdata{'newurl'}; # flag si nouveau url (flag if new URL)
print "Content-type: text/html\n\n";
print "<HTML><BODY>";
if ($newurl == 1) # si nouveau url, mettre le 
dans la base permanente (new URL? ==> smash it into database)
{
$varurl = $baseurl;
}
if ($newurl 
!= 1) # si pas un nouveau url, chercher dans la liste pour 
une nouvelle (no new URL? search list for new)
{
open(URLS, "<data/url.txt") || &ErrorMsg; # lire 
url.txt
$continue = 0;
while ($continue == 0 && 
($baseurl = readline *URLS)) # lire une ligne a la fois (one line only)
{
if (eof 
URLS) # si la fin de url.txt alors fin de recherche (end of url.txt? ==> end of search)
{
print 
"<center><h1>RECHERCHE COMPLETÉ</h1></center>";
exit;
}
open(VERIFURL, 
"<data/verifurl.txt") || &ErrorMsg;
$found = 0;
while 
(($found == 0) && ($line = readline *VERIFURL)) 
{
if ($line 
eq $baseurl) # si $line = $baseurl alors url a 
deja été fais. saute au prochain (jump to next URL in this 
case)
{
$found = 1;
}
}
if(!$found)
{
$continue 
= 1;
}
close(VERIFURL);
}
close(URLS);
}
if (($continue == 1) || ($newurl == 1))
 
### MAIN LOOP ###
{
###############SEARCH URL'S######################
$ua = LWP::UserAgent->new;
$req = HTTP::Request->new ('GET', $baseurl);
$resp = $ua->request($req);
$response_http = $resp->as_string(); # mettre code de la page http en une variable (smash http source code into variable)
#print "$response_http";
$response_http =~ s/<a href=.?\//<a href="$varurl/gi;
 
# remplace les lien (a href=/fr) relatif avec un lien absolu (replace relative link with absolute lkink)
print "<h1>BaseURL: $baseurl <BR>file length: ";
print length $response_http;
print "<br></h1>"; 
while ($response_http =~ m/$varurl[a-zA-Z0-9\:\.\/\%\?\=\&]+/i) 
# cherche des liens http (search http links)
{
$varhttp = $&;
$varremainhttp = 
$';
if ($varhttp =~ m/(\.php|\.htm|\.shtm|\.asp|\?|\.cgi)/i) 
{
print 
"FOUND: $varhttp <br>";
push(@http, $varhttp);
}
$response_http = 
$varremainhttp;
}
open (URLS, ">>data/url.txt") || 
&ErrorMsg; #Append to data folder
foreach $url (@http)
{
print "URL 
AJOUTÉ: $url<br>";
print URLS "$url\n";
}
close (URLS);
################### SEARCH EMAILS #########################
$response_email = $resp->as_string();
while ($response_email =~ m/\w+@\w+\.\w+/) # cherche 
email with pattern something@something.something
{
push(@email, $&);
$response_email = $';
}
print "<br><h3>@email</h3>"; # afficher les emails qui on été trouvé (show found emails)
open (EMAILS, ">>data/email.txt") || &ErrorMsg; # 
Append to data folder
foreach $single_email (@email)
{
print EMAILS 
"$single_email\n";
}
close (EMAILS);
############################ 
WRITE CHECKED URL TO FILE ###########################
open (VERIFURL, 
">>data/verifurl.txt") || &ErrorMsg; # Write to data folder
print 
VERIFURL "$baseurl";
close (VERIFURL); # ajouter le URL verifier a verifurl.txt
} #### END IF ####
print "<meta http-equiv=\"refresh\" 
content=\"0; URL=spider.cgi?newurl=0&varurl=$varurl\">"; # reload page
print 
"</BODY></HTML>";
####################### SUB #####################
sub ErrorMsg
{
print "Server can't open file : $!";
exit;
}