#!usr/bin/perl -w # Code to align DNA sequences from an existing alignment of the # corresponding amino acid sequences. # $protFile = "prot.nex"; $dnaFile = "dna.phy"; $outputFile = "alignDna.nex"; $NprotChar = 0; $iTax=0; open FH1, "<$protFile"; while (){ if (m/dimensions/i){ @line = split(/ /) ; foreach $line (@line){ if ($line =~ /ntax/i){ @dummy= split /[=;]/, $line; $nTax = $dummy[1]; } if ($line =~ /nchar/i){ @dummy= split /[=;]/, $line; $NprotChar = $dummy[1]; } } } if ($readMatrix){ @dummy = split; if ($#dummy > 0){ $name[$iTax] = $dummy[0]; for ($j=1; $j<=$#dummy; $j++){ $protSequence[$iTax] .= $dummy[$j]; } $iTax++; } elsif ($#dummy == -1){ $iTax=0; } else { $readMatrix = 0; } } if (m/matrix/){ $readMatrix=1; } } close FH1; for ($iTax=0; $iTax<$nTax; $iTax++){ print "$name[$iTax] $protSequence[$iTax]\n\n\n"; } # --------------- read the protein sequence and translate ------ open FH2, "<$dnaFile"; open FH3, ">$outputFile"; print FH3 "#NEXUS\nbegin data;\n"; print FH3 "dimensions ntax=$nTax nchar=" . 3*$NprotChar . ";\n"; print FH3 "format datatype=dna gap=-;\nmatrix\n"; while() { $line = $_; $found = 0; for ($iTax=0; $iTax<$nTax; $iTax++){ if (m/($name[$iTax])/){ $i = $iTax; @dummy = split ; $dnaSequence = $dummy[1]; $found = 1; } } if (! $found){ print "not found\n";} else{ print FH3 "$name[$i] "; $protsequence = $protSequence[$i]; $protsequence = reverse $protsequence; $dnaSequence = reverse $dnaSequence; for ($iseq=0; $iseq<$NprotChar; $iseq++){ $aa = chop $protsequence; #print "$aa."; if ($aa eq "-"){ print FH3 "---"; } else { for $i (1..3){ $na=chop $dnaSequence; print FH3 $na; } } } print FH3 "\n"; } } close FH3; close FH2;