This script download all emails of all recruiters.
=============================================
using simple regex.
=============================================
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use LWP::Simple;
my $url = 'http://www.cybercoders.com/recruiter/';
my $html = get "$url";
my @html = split(/\n/,$html);
for my $i (@html){
next unless $i =~ /<a href="\/recruiter\/(\S.*\/)">/ ;
print "${url}${1}\n";
my $indiv_recruiter = get("$url$1");
my @indiv_recruiter = split(/\n/,$indiv_recruiter);
for my $j (@indiv_recruiter ){
next unless $j =~ /<a class="recruiter-email-link" href="mailto:(\S.*?\@CyberCoders\.com)">/ ;
print "$1\n"
}
}
=========================================================
Using HTM::treebuilder::Xpath
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use LWP::Simple;
use HTML::TreeBuilder;
use HTML::TreeBuilder::XPath;
my $url = 'http://www.cybercoders.com/recruiter/';
my $html = get "$url";
my $tree = HTML::TreeBuilder::XPath->new_from_content();
$tree->parse($html);
my @customers = $tree->findnodes('//div[@class="recruiter-item"');
for my $custm (@customers){
my @all_a_tag = $custm->look_down('_tag' => 'a' ) ;
my $recruiter_pth = $all_a_tag[0]->attr('href') ."\n" ;
$recruiter_pth =~ s/\/recruiter\///g;
get_email($recruiter_pth);
}
sub get_email {
my ($r_pth)=@_;
my $html = get ("$url$r_pth") ;
my $tree2 = HTML::TreeBuilder->new_from_content($html);
my @mailto = $tree2->look_down( _tag => 'a' , class => 'recruiter-email-link' );
print $mailto[0]->attr('href')."\n";
print "get $url$r_pth";
}
No comments:
Post a Comment