gff文件提取cds并对照

 1 #!/usr/bin/perl
 2 
 3 use strict;
 4 use warnings;
 5 
 6 my $gff = $ARGV[0];my %cut = &cut($gff);
 7 
 8 my $gene_number = $ARGV[1];my $gene_id = $ARGV[2];my $gene_name = $ARGV[3];my $start = $ARGV[4];my $end = $ARGV[5];
 9 
10 foreach my $key_1(keys %cut)
11 {
12     my $hash_2 = $cut{$key_1};
13 
14     foreach my $key_2(keys %$hash_2)
15     {
16         my @arr = $hash_2 -> {$key_2};
17         
18 #        print "$key_1,$key_2,$arr[0][0],$arr[0][1]\n";
19         
20         if($key_1 eq $gene_name)
21         {
22 
23             
24             if($arr[0][0] <= $start && $arr[0][1] >= $start )
25             {
26                 if($start+30>=$arr[0][1])
27                 {
28                     print "$gene_number\t$gene_id\t$gene_name\t$start".'-'."$end\tstart:exon$key_2\n";
29                 }
30             }
31             if($arr[0][0] <= $end && $arr[0][1] >= $end )
32             {
33                 if($end-30>=$arr[0][0])
34                 {
35                     print "$gene_number\t$gene_id\t$gene_name\t$start".'-'."$end\tend:exon$key_2\n";
36                 }
37             }
38         }
39     }
40 }
41 
42 
43 #############sub################
44 
45 sub cut
46 {
47     my %gene;
48 
49     my $exon = 1;my $start = 0;my $length = 0;
50 
51     my $gff = shift;open GFF,"$gff"; 
52     
53     while(my $line = <GFF>)
54     {
55         chomp $line;
56 
57         my @q = split /\s/,$line;
58 
59         if($q[2] =~ /mRNA/)
60         {
61             $exon = 1;$start = $q[3];$length = 0;
62         }
63         elsif($q[2] =~ /CDS/)
64         {
65             $q[8] =~ /Parent=(.*);S/m;my $key = $1;  
66 
67             my $new_length = $q[4]-$q[3]; 
68 
69             $exon =~/(\w*)/; $gene{$key}{$1} = [$length+1,$length+$new_length];
70 
71             $exon++; $length += $new_length;
72         }    
73     }   
74     return %gene;
75 }

猜你喜欢

转载自www.cnblogs.com/yuanjingnan/p/11087860.html
今日推荐