in this tutorial you will get to learn :
1) How to extract data from a html table
2 ) regex
3) appending a hash to a hash
4) for loops
----------------------------------------------------------------------------------------------
HTML file
----------------------------------------------------------------------------------------------
<table class="table1" border="1" cellspacing="1" cellpadding="1" style="width: 1144px; height: 883px;">
<tr>
<td> <b>Dom0<br /></b>
</td><td> <b>DomU <br /></b>
</td><td> <b>ipaddress<br /></b>
</td><td> <b>server name <br /></b>
</td><td> <b>application <br /></b>
</td><td> <b>website <br /></b>
</td><td> <b>assigned to </b><br />
</td></tr>
<tr>
<td> S1 (1.51)<br />
</td><td> yourwebsitenews-db 10GB ram 100 GB HDD
</td><td> 1.0.0.101<br />
</td><td> yourwebsitenews-db
</td><td> win2k8 ,sql
</td><td> MSSQL Server
</td><td> yourwebsitenews
</td></tr>
<tr>
<td> <br />
</td><td> yourwebsitetvweb3 4GB ram 48 GB HDD<br />
</td><td> 9.9.02.132<br />
</td><td> yourwebsitetvweb3<br />
</td><td> win2k8,IIS<br />
</td><td> yourwebsite TV <br />
</td><td> yourwebsitetv<br />
</td></tr>
<tr>
<td> <br />
</td><td> yourwebsitetvweb4 4GB ram 30 GB HDD<br />
</td><td> 9.9.202.146<br />
</td><td> yourwebsitetvweb4<br />
</td><td> win2k8,IIS<br />
</td><td> yourwebsite TV <br />
</td><td> yourwebsitetv<br />
</td></tr>
<tr>
<td> <br />
</td><td> <br />
</td><td> <br />
</td></tr>
<tr>
<td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td></tr>
<tr>
<td> S2 (1.52) <br />
</td><td> web1 5.5GB ram 50GB hdd<br />
</td><td> 1.0.0.106<br />
</td><td> web1 <br />
</td><td> apache,php,memcache<br />
</td><td> someweb,cndas,lms.someweb.com<br />
</td><td> IWPL
</td></tr>
<tr>
<td> <br />
</td><td> web4 5.5GB ram 50GB hdd<br />
</td><td> 1.0.0.109<br />
</td><td> web4<br />
</td><td> apache,php,memcache<br />
</td><td> IWPL
</td></tr>
<tr>
<td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td></tr>
<tr>
<td> S3 (1.53) <br />
</td><td> DB1 11GB ram 100GB hdd <br />
</td><td> 1.0.0.111<br />
</td><td> db1<br />
</td><td> mysql <br />
</td><td> all commons (auth ),someweb<br />
</td><td> IL
</td></tr>
<tr>
<td> <br />
<tr>
<td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td></tr>
<tr>
<td> S4 (1.54) <br />
</td><td> DB3 10GB ram 200GB hdd<br />
</td><td> 1.0.0.116<br />
</td><td> db3<br />
</td><td> mysql <br />
</td><td> ors_db,ls_ting
</td><td> IWPL
</td></tr>
<tr>
<td> <br />
</td><td> yourwebsitetvmemcach-1 10 Gb ram 24 Gb hdd<br />
</td><td> 9.9.202.134<br />
</td><td> yourwebsitetvmem-1<br />
</td><td> yourwebsite Tv Memcached Server-1<br />
</td><td> yourwebsite Tv Memcached Server-1<br />
</td><td> yourwebsitetv
</td></tr>
<tr>
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td></tr>
<tr>
<td> S5 (1.55)<br />
</td><td> 1 systems ops 8GB 100GB <br />
</td><td> 1.0.0.121<br />
</td><td> ops<br />
</td><td> mysql,php,apache<br /><br />
</td><td> odw <br /> opsview <br /> reports <br /> runtime <br />
</td><td> IWPL
</td></tr>
<tr>
<td> <br />
<td> <br />
<td> <br />
</td><td> cache 4GB ram 129GB hdd<br />
</td><td> 9.9.202.209<br />
</td><td> linux-yourwebsite<br />
</td><td> php,apache<br />
</td><td> yourwebsite <br />
</td><td> yourwebsitetv
</td></tr>
<tr>
<td> <br />
</td><td> cache 6GB ram 20GB hdd<br />
</td><td> 1.0.0.125<br />
</td><td> cache-9<br />
</td><td> backup server
</td><td> backup server
</td><td> IL
</td></tr>
<tr>
<td> <br />
</td><td> web16- 5 GB RAM 46GB HDD
</td><td> 1.0.0.103
</td><td> web16
</td><td> php,apache
</td><td> voiofa.com,solr instance
</td><td> Il
</td></tr>
<tr>
<td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td><td> <br />
</td></tr>
<tr>
<td> S6 (1.56) <br />
</td><td> web9 5GBram <br />
</td><td> 1.0.0.126<br />
</td><td> web9 <br />
</td><td> apache,php,memcache<br />
</td><td> Il
</td></tr>
<tr>
<td> <br />
</td><td> web10 <br />
</td><td> 1.0.0.127<br />
</td><td> web10<br />
</td><td> apache,php,memcache<br />
</td><td> hea.ia.com,esults.ia.com
</td><td> Il
</td></tr>
<tr>
<td> <br />
</td><td> r1s6v1web13 5-GB ram - 50-GB hdd<br />
</td><td> 1.0.0.129<br />
</td><td> web13<br />
</td><td> apache,php,memcache
</td><td> critry,m.try,admin.ccry.com
</td><td> IL
</td></tr></table>
==================================================================
PERL script
==================================================================
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use LWP::Simple;
use HTML::TableExtract;
#host your table on a local site
my $url = 'http://site2.int.com/table_tmp1.html';
my @data = get_data($url) ;
shift(@data);
my %serverdata;
my %serverdata_sub;
my $DOM0;
my @aeey;
my $re1='((?:[a-z][a-z]*[0-9]+[a-z0-9]*))'; # Alphanum 1
my $re2='(\\s+)'; # White Space 1
my $re3='\\((.*)\\)'; # Round Braces 1
foreach my $y (@data) {
next unless ($y->[2]);
$DOM0 = ($y->[0])? $y->[0] : $DOM0;
my $re=$re1.$re2.$re3;
($DOM0 = $DOM0) =~ s/$re/172.16.$3/isg; # S1 (1.51)
# Dom0 DomU ipaddress server name application website assigned to
# "$DOM0 -----> $y->[2] ========== > $y->[1], $y->[3], $y->[4] $y->[5] $y->[6]
";
push (@{$serverdata{$DOM0}{$y->[2]}}, { 'serverdetails' => $y->[1] , 'servername' => $y->[3], 'application' => $y->[4] , 'websites' => $y->[5] } ) ;
if ( $y->[0] ) {
$DOM0= "$y->[0]";
}else {
$DOM0 = $DOM0;
}
}
foreach my $keysi ( keys %serverdata) {
print "================== $keysi ===============\n";
print Dumper ($serverdata{$keysi}) ;
print "================== ===============\n";
}
sub get_data {
my $sub_url = $_[0];
my $html = get "$sub_url";
my @arry;
my $te = new HTML::TableExtract( depth => 0, count => 0, attribs => { 'class' => "table1" } );
$te->parse($html);
foreach my $ts ($te->tables) {
foreach my $row ($ts->rows) {
# next unless $row->[0] =~ /\w/; # skip garbage rows
@{$row} = grep(s/\n\n|(^\s+|\s+$)//g, @{$row});
push( @arry , [@{$row}] ) ;
# print "$row->[0] , $row->[1],$row->[2],$row->[3]\n";
}
}
return @arry ;
}
No comments:
Post a Comment