#!/usr/bin/perl
use strict;
use warnings;
use HTML::TableExtract;
use Data::Dumper;
my $inputhtml = shift;
my $content=<<_CMD_
<table class="gy">
<tr><th>Sample</th><th>Library</th><th>Raw Reads</th><th>Clean Reads</th><th>Raw Base(G)</th><th>Clean Base(G)</th><th>Effective Rate(%)</th><th>Error Rate(%)</th><th>Q20(%)</th><th>Q30(%)</th><th>GC Content(%)</th></tr>
<tr>
<td>Sample1</td><td>TKD1805000</td><td>15,014,440</td><td>14,731,101</td><td>4.5</td><td>4.42</td><td rowspan=1>98.11</td><td rowspan=1>0.01</td><td>96.94</td><td>92.81</td><td>57.50</td>
</tr>
<tr>
<td>Sample2</td><td>TKD1805000</td><td>7,494,788</td><td>7,424,284</td><td>2.25</td><td>2.23</td><td rowspan=1>99.06</td><td rowspan=1>0.03</td><td>92.60</td><td>84.22</td><td>55.30</td>
</tr>
</table>
_CMD_
my $te = new HTML::TableExtract();
$te->parse( $content );
for my $ts ($te -> table_states){
for my $row ($ts->rows) {
print join("\t", @$row), "\n";
}
}
输出列表:
Sample Library Raw Reads Clean Reads Raw Base(G) Clean Base(G) Effective Rate(%) Error Rate(%) Q20(%) Q30(%) GC Content(%)
Sample1 TKD1805000 18,839,956 18,687,871 5.65 5.61 99.19 0.01 96.00 90.48 52.84
参考:https://blog.csdn.net/cnki_ok/article/details/6534346
https://metacpan.org/pod/HTML::TableExtract