Carreguei um arquivo multifasta de genes, contei as trincas de códons, e joguei num hash com os códons, aminoácidos e frequência de cada códon.
#!/usr/bin/perl
use Bio::SeqIO;
use strict;
use warnings;
my $ref1 = conta_codons("genes_yeast.ffn");
my %codons = %{$ref1};
my $ref2 = table_aa(\%codons);
my %usage = %{$ref2};
my $last="a";
foreach my $codon (sort {$usage{$a}{name} cmp $usage{$b}{name}} keys %usage) {
if ($last eq $usage{$codon}{name}){ print "$codon\t$usage{$codon}{name}\t$usage{$codon}{contagem}\n";}
else { print "\n$codon\t$usage{$codon}{name}\t$usage{$codon}{contagem}\n"; }
$last = $usage{$codon}{name};
}
exit;
sub conta_codons {
my $file_name = shift;
my $file_obj = Bio::SeqIO -> new ( -file => $file_name,
-format => 'fasta' );
#gera hash com a quantidade de vezes que cada codon aparece.
my %codons;
while (my $inseq = $file_obj->next_seq) {
# grava cada seq na variavel $sequence
my $sequence = $inseq->seq();
#anda de 3 em 3 (tem q ver se da certo)
for(my $i=0; $i < (length($sequence) - 2) ; $i += 3){
my $codon = substr($sequence,$i,3);
#se o codon for novo na tabela, ele recebe o valor 1
if (!defined $codons{$codon}) { $codons{$codon} = 1 }
# se ele for recorrente, apenas soma 1.
else { $codons{$codon}++; }
}
}
return \%codons;
}
sub table_aa {
my $ref = shift;
my %codons = %{$ref};
my %usage;
foreach my $codon (keys %codons) {
if ( $codon =~ /GC./i) { $usage{$codon}{name}= "ala"; $usage{$codon}{contagem}= $codons{$codon}; } # Alanine
elsif ( $codon =~ /TG[TC]/i) { $usage{$codon}{name}= "cys"; $usage{$codon}{contagem}= $codons{$codon}; } # Cysteine
elsif ( $codon =~ /GA[TC]/i) { $usage{$codon}{name}= "aca"; $usage{$codon}{contagem}= $codons{$codon}; } # Aspartic Acid
elsif ( $codon =~ /GA[AG]/i) { $usage{$codon}{name}= "glu"; $usage{$codon}{contagem}= $codons{$codon}; } # Glutamic Acid
elsif ( $codon =~ /TT[TC]/i) { $usage{$codon}{name}= "phe"; $usage{$codon}{contagem}= $codons{$codon}; } # Phenylalanine
elsif ( $codon =~ /GG./i) { $usage{$codon}{name}= "gly"; $usage{$codon}{contagem}= $codons{$codon}; } # Glycine
elsif ( $codon =~ /CA[TC]/i) { $usage{$codon}{name}= "his"; $usage{$codon}{contagem}= $codons{$codon}; } # Histidine
elsif ( $codon =~ /AT[TCA]/i) { $usage{$codon}{name}= "iso"; $usage{$codon}{contagem}= $codons{$codon}; } # Isoleucine
elsif ( $codon =~ /AA[AG]/i) { $usage{$codon}{name}= "lys"; $usage{$codon}{contagem}= $codons{$codon}; } # Lysine
elsif ( $codon =~ /TT[AG]|CT./i) { $usage{$codon}{name}= "leu"; $usage{$codon}{contagem}= $codons{$codon}; } # Leucine
elsif ( $codon =~ /ATG/i) { $usage{$codon}{name}= "met"; $usage{$codon}{contagem}= $codons{$codon}; } # Methionine
elsif ( $codon =~ /AA[TC]/i) { $usage{$codon}{name}= "asp"; $usage{$codon}{contagem}= $codons{$codon}; } # Asparagine
elsif ( $codon =~ /CC./i) { $usage{$codon}{name}= "pro"; $usage{$codon}{contagem}= $codons{$codon}; } # Proline
elsif ( $codon =~ /CA[AG]/i) { $usage{$codon}{name}= "glu"; $usage{$codon}{contagem}= $codons{$codon}; } # Glutamine
elsif ( $codon =~ /CG.|AG[AG]/i) { $usage{$codon}{name}= "arg"; $usage{$codon}{contagem}= $codons{$codon}; } # Arginine
elsif ( $codon =~ /TC.|AG[TC]/i) { $usage{$codon}{name}= "ser"; $usage{$codon}{contagem}= $codons{$codon}; } # Serine
elsif ( $codon =~ /AC./i) { $usage{$codon}{name}= "thr"; $usage{$codon}{contagem}= $codons{$codon}; } # Threonine
elsif ( $codon =~ /GT./i) { $usage{$codon}{name}= "val"; $usage{$codon}{contagem}= $codons{$codon}; } # Valine
elsif ( $codon =~ /TGG/i) { $usage{$codon}{name}= "try"; $usage{$codon}{contagem}= $codons{$codon}; } # Tryptophan
elsif ( $codon =~ /TA[TC]/i) { $usage{$codon}{name}= "tyr"; $usage{$codon}{contagem}= $codons{$codon}; } # Tyrosine
elsif ( $codon =~ /TA[AG]|TGA/i) { $usage{$codon}{name}= "stp"; $usage{$codon}{contagem}= $codons{$codon}; } # Stop
else { print STDERR "Bad codon \"$codon\"!!\n" }
}
return \%usage;
}
Nenhum comentário:
Postar um comentário