#!usr/bin/perl -w

# Code to align DNA sequences from an existing alignment of the
# corresponding amino acid sequences.
# 


$protFile = "prot.nex";
$dnaFile = "dna.phy";
$outputFile = "alignDna.nex";

$NprotChar = 0;
$iTax=0;

open FH1, "<$protFile";
while (<FH1>){
  if (m/dimensions/i){
    @line = split(/ /) ; 
    foreach $line (@line){
      if ($line =~ /ntax/i){ 
	@dummy= split /[=;]/, $line;
	$nTax = $dummy[1];
      }
      if ($line =~ /nchar/i){ 
	@dummy= split /[=;]/, $line;
	$NprotChar = $dummy[1];
      }
    }
  }
  if ($readMatrix){
    @dummy = split;
    if ($#dummy > 0){
      $name[$iTax] = $dummy[0];
      for ($j=1; $j<=$#dummy; $j++){
	$protSequence[$iTax] .= $dummy[$j];
      }
      $iTax++;
    }
    elsif ($#dummy == -1){
      $iTax=0;
    }
    else {
      $readMatrix = 0;
    }
  }
  if (m/matrix/){
    $readMatrix=1;
  }
}
close FH1;
for ($iTax=0; $iTax<$nTax; $iTax++){
print "$name[$iTax] $protSequence[$iTax]\n\n\n";
}

# ---------------  read the protein sequence and translate ------

open FH2, "<$dnaFile";
open FH3, ">$outputFile";
print FH3 "#NEXUS\nbegin data;\n";
print FH3 "dimensions ntax=$nTax nchar=" . 3*$NprotChar . ";\n";
print FH3 "format datatype=dna gap=-;\nmatrix\n";

while(<FH2>) {
  $line = $_;
  $found = 0;
  for ($iTax=0; $iTax<$nTax; $iTax++){
    if (m/($name[$iTax])/){
      $i = $iTax;
      @dummy = split ;
      $dnaSequence = $dummy[1];
      $found = 1;
    }
  }
  if (! $found){ print "not found\n";}
  else{
    print FH3 "$name[$i] ";
    $protsequence = $protSequence[$i];
    $protsequence = reverse $protsequence;
    $dnaSequence = reverse $dnaSequence;
    for ($iseq=0; $iseq<$NprotChar; $iseq++){
      $aa = chop $protsequence; #print "$aa.";
      if ($aa eq "-"){
	print FH3 "---";
      }
      else {
	for $i (1..3){
	  $na=chop $dnaSequence;
	  print FH3 $na;
	}
      }
    }
    print FH3 "\n";
  }
}
close FH3;
close FH2;


