use strict; use warnings; open DATA, "<data.txt" or die ("cannot open file:$!"); open OUT, ">out.txt"; my @num = (5,6,6,5,5,5); my @type = ("A","B","C","D","E","F","G"); foreach my $tmp1 (<DATA>){ chomp $tmp1; my @arr; @arr = split(/ /,$tmp1); print OUT $arr[0]."\t"; my $order; my $o = 0; foreach my $tmp2 (@num){ my $gen = ""; foreach my $tmp3 (1...$tmp2){ if ($arr[$o+$tmp3]){ $gen = $gen.$type[$tmp3-1] } } if (length($gen) == 1){ print OUT $gen." ".$gen; }elsif(length($gen) == 2){ print OUT substr($gen,0,1)." ".substr($gen,1,1); }else{ print OUT "error"; } print OUT "\t"; $o += $tmp2; } print OUT "\n"; } close(DATA); close(OUT);
In the original data, the first column is the sample id, followed by the presence or absence of each locus. The alleles of M1-M6 are 5, 6, 6, 5, 5, and 5 respectively.