Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

gnTranslator.cpp

Go to the documentation of this file.
00001 /////////////////////////////////////////////////////////////////////////////
00002 // File:            gnTranslator.h
00003 // Purpose:         Filter for all Sequences
00004 // Description:     translates, converts sequence
00005 // Changes:        
00006 // Version:         libGenome 0.5.1 
00007 // Author:          Aaron Darling 
00008 // Modified by:     
00009 // Copyright:       (c) Aaron Darling 
00010 // Licenses:        See COPYING file for details
00011 /////////////////////////////////////////////////////////////////////////////
00012 #include "gn/gnTranslator.h"
00013 #include "gn/gnCompare.h"
00014 
00015 //      static data access, avoids static initialization order fiasco
00016 const gnTranslator *gnTranslator::ProteinDNATranslator(){
00017         const static gnTranslator* t_trans = new gnTranslator(ProteinDNATranslatorType);
00018         return t_trans;
00019 }
00020 const gnTranslator *gnTranslator::ProteinRNATranslator(){
00021         const static gnTranslator* t_trans = new gnTranslator(ProteinRNATranslatorType);
00022         return t_trans;
00023 }
00024 const gnTranslator *gnTranslator::DNAProteinTranslator(){
00025         const static gnTranslator* t_trans = new gnTranslator(DNAProteinTranslatorType);
00026         return t_trans;
00027 }
00028 const gnTranslator *gnTranslator::RNAProteinTranslator(){
00029         const static gnTranslator* t_trans = new gnTranslator(RNAProteinTranslatorType);
00030         return t_trans;
00031 }
00032 
00033 //      public:
00034 gnTranslator::gnTranslator()
00035 {
00036         use_default = false;
00037         m_defaultChar = 0;
00038         m_defaultInputWidth = 1;
00039 }
00040 
00041 gnTranslator::gnTranslator( const gnTranslator &sf )
00042 {
00043         m_name = sf.m_name;
00044         use_default = sf.use_default;
00045         m_defaultChar = sf.m_defaultChar;
00046         compare = sf.compare;
00047         m_inputTable = sf.m_inputTable;
00048         m_outputTable = sf.m_outputTable;
00049         m_defaultInputWidth = sf.m_defaultInputWidth;
00050 }
00051 gnTranslator::gnTranslator( gnTranslatorType t_type )
00052 {
00053         use_default = false;
00054         m_defaultChar = 0;
00055         switch(t_type){
00056                 case ProteinDNATranslatorType:
00057                         CreateProteinDNATranslator();
00058                         break;
00059                 case ProteinRNATranslatorType:
00060                         CreateProteinRNATranslator();
00061                         break;
00062                 case DNAProteinTranslatorType:
00063                         CreateDNAProteinTranslator();
00064                         break;
00065                 case RNAProteinTranslatorType:
00066                         CreateRNAProteinTranslator();
00067                         break;
00068         }
00069 }
00070 
00071         // gnSeqC 
00072 gnSeqC gnTranslator::Filter( const gnSeqC ch ) const{
00073         for(uint32 i=0; i < m_inputTable.size(); i++){
00074                 if(m_inputTable[i].length() == 1)
00075                         if(compare->Contains(m_inputTable[i][0], ch))
00076                                 return m_outputTable[i][0];
00077         }
00078         return m_defaultChar;
00079 }
00080 
00081 void gnTranslator::Filter( gnSeqC** seq, uint32& len ) const{
00082         uint32 curpos = 0;
00083         string output;
00084         while(curpos < len){
00085                 uint32 i=0;
00086                 for(; i < m_inputTable.size(); i++){
00087                         //don't compare if there aren't enough chars
00088                         uint32 curlen = m_inputTable[i].length();
00089                         if(len - curpos < curlen)
00090                                 continue;
00091                         if(compare->Contains(m_inputTable[i].data(), *seq + curpos, curlen)){
00092                                 output += m_outputTable[i];
00093                                 curpos += curlen;
00094                                 break;
00095                         }
00096                 }
00097                 if(i == m_inputTable.size()){
00098                         //no match was found.  
00099                         if(use_default)  //fill with the default char?
00100                                 output += m_defaultChar;
00101                         curpos += m_defaultInputWidth;
00102                 }
00103         }
00104         if(output.length() > len){
00105                 delete[] *seq;
00106                 *seq = new gnSeqC[output.length()];
00107         }
00108         len = output.length();
00109         memcpy(*seq, output.data(), len);
00110 }
00111         // string
00112 void gnTranslator::Filter( string &seq ) const{
00113         uint32 curpos = 0;
00114         uint32 len = seq.length();
00115         string output;
00116         while(curpos < len){
00117                 uint32 i=0;
00118                 for(; i < m_inputTable.size(); i++){
00119                         //don't compare if there aren't enough chars
00120                         uint32 curlen = m_inputTable[i].length();
00121                         if(len - curpos < curlen)
00122                                 continue;
00123                         if(compare->Contains(m_inputTable[i], seq.substr(curpos, curlen))){
00124                                 output += m_outputTable[i];
00125                                 curpos += curlen;
00126                                 break;
00127                         }
00128                 }
00129                 if(i == m_inputTable.size()){
00130                         //no match was found.  
00131                         if(use_default)  //fill with the default char?
00132                                 output += m_defaultChar;
00133                         curpos += m_defaultInputWidth;
00134                 }
00135         }
00136         seq = output;
00137 }
00138 
00139 // fill map
00140 void  gnTranslator::SetPair( const string& ch1, const string& ch2 )
00141 {
00142         if(ch1.length() == 0)
00143                 return; //cant have an empty input, empty output is ok
00144 
00145         m_inputTable.push_back(ch1);
00146         m_outputTable.push_back(ch2);
00147 }
00148 
00149 void gnTranslator::RemovePair( const string& ch )
00150 {
00151         for(uint32 i=0; i < m_inputTable.size(); i++){
00152                 if(m_inputTable[i] == ch){
00153                         m_inputTable.erase(m_inputTable.begin()+i);
00154                         m_outputTable.erase(m_outputTable.begin()+i);
00155                 }
00156         }
00157 }
00158 
00159 // standard comparators
00160 void gnTranslator::CreateProteinDNATranslator(){
00161         SetName( "Protein to DNA Translator" );
00162         
00163         SetDefaultChar('X');
00164         SetCompare(gnCompare::ProteinSeqCompare());
00165         m_defaultInputWidth = 1;
00166         SetPair( "F", "TTY" );
00167         SetPair( "L", "YTX" );  //fix this somehow.  how?
00168         SetPair( "I", "ATH" );
00169         SetPair( "M", "ATG" );
00170         SetPair( "V", "GTX" );
00171         SetPair( "P", "CCX" );
00172         SetPair( "T", "ACX" );
00173         SetPair( "A", "GCX" );
00174         SetPair( "Y", "TAY" );
00175         SetPair( ".", "TRR" );//fix this somehow.  how?
00176         SetPair( "H", "CAY" );
00177         SetPair( "Q", "CAR" );
00178         SetPair( "N", "AAY" );
00179         SetPair( "K", "AAR" );
00180         SetPair( "D", "GAY" );
00181         SetPair( "E", "GAR" );
00182         SetPair( "C", "TGY" );
00183         SetPair( "W", "TGG" );
00184         SetPair( "G", "GGX" );
00185 
00186         SetPair( "S", "TCX" );
00187         SetPair( "S", "AGY");
00188         SetPair( "R", "CGX");
00189         SetPair( "R", "AGR");
00190 }
00191 
00192 void gnTranslator::CreateProteinRNATranslator(){
00193         SetName( "Protein to RNA Translator" );
00194         SetDefaultChar('X');
00195         SetCompare(gnCompare::ProteinSeqCompare());
00196         m_defaultInputWidth = 1;
00197 
00198         SetPair( "F", "UUY" );
00199         SetPair( "L", "YUX" );  //fix this somehow.  how?
00200         SetPair( "I", "AUH" );
00201         SetPair( "M", "AUG" );
00202         SetPair( "V", "GUX" );
00203         SetPair( "P", "CCX" );
00204         SetPair( "U", "ACX" );
00205         SetPair( "A", "GCX" );
00206         SetPair( "Y", "UAY" );
00207         SetPair( ".", "URR" );//fix this somehow.  how?
00208         SetPair( "H", "CAY" );
00209         SetPair( "Q", "CAR" );
00210         SetPair( "N", "AAY" );
00211         SetPair( "K", "AAR" );
00212         SetPair( "D", "GAY" );
00213         SetPair( "E", "GAR" );
00214         SetPair( "C", "UGY" );
00215         SetPair( "W", "UGG" );
00216         SetPair( "G", "GGX" );
00217 
00218         SetPair( "S", "UCX" );
00219         SetPair( "S", "AGY");
00220         SetPair( "R", "CGX");
00221         SetPair( "R", "AGR");
00222 }
00223 
00224 void gnTranslator::CreateDNAProteinTranslator(){
00225         SetName( "DNA to Protein Translator" );
00226         SetDefaultChar('X');
00227         SetCompare(gnCompare::DNASeqCompare());
00228         m_defaultInputWidth = 3;
00229         use_default = true;
00230         
00231         SetPair( "TTY", "F" );
00232         SetPair( "CTX", "L" );
00233         SetPair( "TTR", "L" );
00234         SetPair( "ATH", "I" );
00235         SetPair( "ATG", "M" );
00236         SetPair( "GTX", "V" );
00237         SetPair( "CCX", "P" );
00238         SetPair( "ACX", "T" );
00239         SetPair( "GCX", "A" );
00240         SetPair( "TAY", "Y" );
00241         SetPair( "TGG", "W" );
00242         SetPair( "TGA", "." );
00243         SetPair( "TAR", "." );
00244         SetPair( "CAY", "H" );
00245         SetPair( "CAR", "Q" );
00246         SetPair( "AAY", "N" );
00247         SetPair( "AAR", "K" );
00248         SetPair( "GAY", "D" );
00249         SetPair( "GAR", "E" );
00250         SetPair( "TGY", "C" );
00251         SetPair( "GGX", "G" );
00252 
00253         SetPair( "TCX", "S" );
00254         SetPair( "AGY", "S" );
00255         SetPair( "CGX", "R" );
00256         SetPair( "AGR", "R" );
00257         
00258         SetPair( "tty", "F" );
00259         SetPair( "ctx", "L" );
00260         SetPair( "ttr", "L" );
00261         SetPair( "ath", "I" );
00262         SetPair( "atg", "M" );
00263         SetPair( "gtx", "V" );
00264         SetPair( "ccx", "P" );
00265         SetPair( "acx", "T" );
00266         SetPair( "gcx", "A" );
00267         SetPair( "tay", "Y" );
00268         SetPair( "tgg", "W" );
00269         SetPair( "tga", "." );
00270         SetPair( "tar", "." );
00271         SetPair( "cay", "H" );
00272         SetPair( "car", "Q" );
00273         SetPair( "aay", "N" );
00274         SetPair( "aar", "K" );
00275         SetPair( "gay", "D" );
00276         SetPair( "gar", "E" );
00277         SetPair( "tgy", "C" );
00278         SetPair( "ggx", "G" );
00279 
00280         SetPair( "tcx", "S" );
00281         SetPair( "agy", "S" );
00282         SetPair( "cgx", "R" );
00283         SetPair( "agr", "R" );
00284 
00285 }
00286 
00287 void gnTranslator::CreateRNAProteinTranslator(){
00288         SetName( "RNA to Protein Translator" );
00289         SetDefaultChar('X');
00290         SetCompare(gnCompare::RNASeqCompare());
00291         m_defaultInputWidth = 3;
00292         use_default = true;
00293         
00294         SetPair( "UUY", "F" );
00295         SetPair( "CUX", "L" );
00296         SetPair( "UUR", "L" );
00297         SetPair( "AUH", "I" );
00298         SetPair( "AUG", "M" );
00299         SetPair( "GUX", "V" );
00300         SetPair( "CCX", "P" );
00301         SetPair( "ACX", "T" );
00302         SetPair( "GCX", "A" );
00303         SetPair( "UAY", "Y" );
00304         SetPair( "UGG", "W" );
00305         SetPair( "UGA", "." );
00306         SetPair( "UAR", "." );
00307         SetPair( "CAY", "H" );
00308         SetPair( "CAR", "Q" );
00309         SetPair( "AAY", "N" );
00310         SetPair( "AAR", "K" );
00311         SetPair( "GAY", "D" );
00312         SetPair( "GAR", "E" );
00313         SetPair( "UGY", "C" );
00314         SetPair( "GGX", "G" );
00315 
00316         SetPair( "UCX", "S" );
00317         SetPair( "AGY", "S" );
00318         SetPair( "CGX", "R" );
00319         SetPair( "AGR", "R" );
00320 
00321 
00322         SetPair( "uuy", "F" );
00323         SetPair( "cux", "L" );
00324         SetPair( "uur", "L" );
00325         SetPair( "auh", "I" );
00326         SetPair( "aug", "M" );
00327         SetPair( "gux", "V" );
00328         SetPair( "ccx", "P" );
00329         SetPair( "acx", "T" );
00330         SetPair( "gcx", "A" );
00331         SetPair( "uay", "Y" );
00332         SetPair( "ugg", "W" );
00333         SetPair( "uga", "." );
00334         SetPair( "uar", "." );
00335         SetPair( "cay", "H" );
00336         SetPair( "car", "Q" );
00337         SetPair( "aay", "N" );
00338         SetPair( "aar", "K" );
00339         SetPair( "gay", "D" );
00340         SetPair( "gar", "E" );
00341         SetPair( "ugy", "C" );
00342         SetPair( "ggx", "G" );
00343 
00344         SetPair( "ucx", "S" );
00345         SetPair( "agy", "S" );
00346         SetPair( "cgx", "R" );
00347         SetPair( "agr", "R" );
00348 }

Generated on Mon Feb 3 02:34:42 2003 for libGenome by doxygen1.3-rc3