This script download all emails of all recruiters. 
=============================================
using simple regex. 
=============================================
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use LWP::Simple;
my $url = 'http://www.cybercoders.com/recruiter/';
my $html =  get "$url";
my @html = split(/\n/,$html);
for my $i  (@html){
    next unless  $i =~ /<a href="\/recruiter\/(\S.*\/)">/ ;
    print "${url}${1}\n";
    my $indiv_recruiter = get("$url$1");
    my @indiv_recruiter = split(/\n/,$indiv_recruiter);
    for my $j (@indiv_recruiter ){
     next unless $j =~ /<a class="recruiter-email-link" href="mailto:(\S.*?\@CyberCoders\.com)">/ ;
        print "$1\n"
    }
}
=========================================================
Using HTM::treebuilder::Xpath
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use LWP::Simple;
use HTML::TreeBuilder;
use HTML::TreeBuilder::XPath;
my $url = 'http://www.cybercoders.com/recruiter/';
my $html =  get "$url";
my $tree = HTML::TreeBuilder::XPath->new_from_content();
$tree->parse($html);
my @customers = $tree->findnodes('//div[@class="recruiter-item"');
for my $custm (@customers){
    my @all_a_tag = $custm->look_down('_tag' => 'a'  ) ;
     my $recruiter_pth = $all_a_tag[0]->attr('href') ."\n" ;
      $recruiter_pth =~ s/\/recruiter\///g;
      get_email($recruiter_pth);
}
sub get_email {
my ($r_pth)=@_;
my $html =  get ("$url$r_pth") ;
my    $tree2 = HTML::TreeBuilder->new_from_content($html);
    my @mailto = $tree2->look_down( _tag => 'a' , class => 'recruiter-email-link' );
     print $mailto[0]->attr('href')."\n";
   print "get $url$r_pth";
}
 
 
No comments:
Post a Comment