perl 模块学习:HTML::TableExtract ( 从 html 中提取出 table 里的内容)

#!/usr/bin/perl
use strict;
use warnings;
use HTML::TableExtract;
use Data::Dumper;


my $inputhtml = shift;
my $content=<<_CMD_
<table class="gy">
<tr><th>Sample</th><th>Library</th><th>Raw Reads</th><th>Clean Reads</th><th>Raw Base(G)</th><th>Clean Base(G)</th><th>Effective Rate(%)</th><th>Error Rate(%)</th><th>Q20(%)</th><th>Q30(%)</th><th>GC Content(%)</th></tr>
            <tr>
            <td>Sample1</td><td>TKD1805000</td><td>15,014,440</td><td>14,731,101</td><td>4.5</td><td>4.42</td><td rowspan=1>98.11</td><td rowspan=1>0.01</td><td>96.94</td><td>92.81</td><td>57.50</td>
            </tr>
            <tr>
            <td>Sample2</td><td>TKD1805000</td><td>7,494,788</td><td>7,424,284</td><td>2.25</td><td>2.23</td><td rowspan=1>99.06</td><td rowspan=1>0.03</td><td>92.60</td><td>84.22</td><td>55.30</td>
            </tr>
 </table>
_CMD_

my $te = new HTML::TableExtract();
$te->parse( $content );
for my $ts ($te -> table_states){
	for my $row ($ts->rows) { 
        print join("\t", @$row), "\n";       
    } 
}
输出列表:
Sample  Library Raw Reads       Clean Reads     Raw Base(G)     Clean Base(G)   Effective Rate(%)       Error Rate(%)   Q20(%)  Q30(%)  GC Content(%)
Sample1  TKD1805000    18,839,956      18,687,871      5.65    5.61    99.19   0.01    96.00   90.48   52.84

参考:https://blog.csdn.net/cnki_ok/article/details/6534346
           https://metacpan.org/pod/HTML::TableExtract

猜你喜欢

转载自blog.csdn.net/rojyang/article/details/81216701