00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00014
00015 #include "gn/gnDebug.h"
00016
00017
00018 const gnFilter *gnFilter::alphabetCharacterFilter(){
00019 const static gnFilter* t_filt = new gnFilter(alphabetCharacterFilterType);
00020 return t_filt;
00021 }
00022
00023 const gnFilter *gnFilter::numberCharacterFilter(){
00024 const static gnFilter* t_filt = new gnFilter(numberCharacterFilterType);
00025 return t_filt;
00026 }
00027
00028
00029 const gnFilter *gnFilter::proteinSeqFilter(){
00030 const static gnFilter* t_filt = new gnFilter(proteinSeqFilterType);
00031 return t_filt;
00032 }
00033
00034 const gnFilter *gnFilter::basicDNASeqFilter(){
00035 const static gnFilter* t_filt = new gnFilter(basicDNASeqFilterType);
00036 return t_filt;
00037 }
00038
00039 const gnFilter *gnFilter::fullDNASeqFilter(){
00040 const static gnFilter* t_filt = new gnFilter(fullDNASeqFilterType);
00041 return t_filt;
00042 }
00043
00044 const gnFilter *gnFilter::basicRNASeqFilter(){
00045 const static gnFilter* t_filt = new gnFilter(basicRNASeqFilterType);
00046 return t_filt;
00047 }
00048
00049 const gnFilter *gnFilter::fullRNASeqFilter(){
00050 const static gnFilter* t_filt = new gnFilter(fullRNASeqFilterType);
00051 return t_filt;
00052 }
00053
00054 const gnFilter *gnFilter::DNAtoRNAFilter(){
00055 const static gnFilter* t_filt = new gnFilter(DNAtoRNAFilterType);
00056 return t_filt;
00057 }
00058
00059 const gnFilter *gnFilter::RNAtoDNAFilter(){
00060 const static gnFilter* t_filt = new gnFilter(RNAtoDNAFilterType);
00061 return t_filt;
00062 }
00063
00064 const gnFilter *gnFilter::DNAComplementFilter(){
00065 const static gnFilter* t_filt = new gnFilter(DNAComplementFilterType);
00066 return t_filt;
00067 }
00068
00069 const gnFilter *gnFilter::RNAComplementFilter(){
00070 const static gnFilter* t_filt = new gnFilter(RNAComplementFilterType);
00071 return t_filt;
00072 }
00073
00074
00075
00076 gnFilter::gnFilter()
00077 {
00078 m_defaultChar = 'n';
00079 m_rDefaultChar = 'n';
00080 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00081 m_pairArray[i] = NO_REVCOMP_CHAR;
00082 }
00083 gnFilter::gnFilter( const gnSeqC defaultChar, const gnSeqC rdefaultChar )
00084 {
00085 m_defaultChar = defaultChar;
00086 m_rDefaultChar = rdefaultChar;
00087 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00088 m_pairArray[i] = NO_REVCOMP_CHAR;
00089 }
00090
00091 gnFilter::gnFilter( const gnFilter &sf )
00092 {
00093 m_name = sf.m_name;
00094 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00095 m_pairArray[i] = sf.m_pairArray[i];
00096 m_defaultChar = sf.m_defaultChar;
00097 m_rDefaultChar = sf.m_rDefaultChar;
00098 }
00099
00100 gnFilter::gnFilter( const gnFilterType f_type ){
00101 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00102 m_pairArray[i] = NO_REVCOMP_CHAR;
00103 switch(f_type){
00104 case alphabetCharacterFilterType:
00105 CreateAlphabetCharacterFilter();
00106 break;
00107 case numberCharacterFilterType:
00108 CreateNumberCharacterFilter();
00109 break;
00110 case proteinSeqFilterType:
00111 CreateProteinFilter();
00112 break;
00113 case basicDNASeqFilterType:
00114 CreateBasicDNAFilter();
00115 break;
00116 case fullDNASeqFilterType:
00117 CreateFullDNAFilter();
00118 break;
00119 case basicRNASeqFilterType:
00120 CreateBasicRNAFilter();
00121 break;
00122 case fullRNASeqFilterType:
00123 CreateFullRNAFilter();
00124 break;
00125 case DNAtoRNAFilterType:
00126 CreateDNAtoRNAFilter();
00127 break;
00128 case RNAtoDNAFilterType:
00129 CreateRNAtoDNAFilter();
00130 break;
00131 case DNAComplementFilterType:
00132 CreateDNAComplementFilter();
00133 break;
00134 case RNAComplementFilterType:
00135 CreateRNAComplementFilter();
00136 break;
00137 }
00138 }
00139
00140
00141 gnFilter::~gnFilter()
00142 {
00143 }
00144
00145 inline
00146 void gnFilter::Filter( gnSeqC** seq, uint32& len ) const
00147 {
00148 gnSeqC* tmp = new gnSeqC[len];
00149 gnSeqI c=0;
00150 for(uint32 i=0; i < len; i++)
00151 if(IsValid((*seq)[i]))
00152 tmp[c++] = m_pairArray[(*seq)[i]];
00153 len = c;
00154 memcpy(*seq, tmp, len);
00155 delete[] tmp;
00156 }
00157
00158 void gnFilter::ReverseFilter( gnSeqC** seq, uint32& len ) const
00159 {
00160 gnSeqC tmp, dum;
00161 uint32 halfLen = len/2;
00162 uint32 end = len - 1;
00163 uint32 curB = 0;
00164 uint32 curE = end;
00165 for( uint32 i=0; i < halfLen ; ++i )
00166 {
00167 tmp = m_pairArray[(*seq)[i]];
00168 dum = m_pairArray[(*seq)[ end - i ]];
00169 if(dum != NO_REVCOMP_CHAR)
00170 (*seq)[ curB++ ] = dum;
00171 if(tmp != NO_REVCOMP_CHAR)
00172 (*seq)[ curE-- ] = tmp;
00173 }
00174 if(len&0x1){
00175 tmp = m_pairArray[(*seq)[halfLen]];
00176 if(tmp != NO_REVCOMP_CHAR)
00177 (*seq)[curB++] = tmp;
00178 }
00179
00180 if(curE >= curB){
00181 memmove(*seq+curB, *seq+curE+1, end - curE);
00182 len = end - curE + curB;
00183 }
00184
00185 }
00186
00187 void gnFilter::Filter( string &seq ) const
00188 {
00189 gnSeqI c=0;
00190 for(uint32 i=0; i < seq.length(); i++)
00191 if(IsValid(seq[i]))
00192 seq[c++] = m_pairArray[seq[i]];
00193 }
00194
00195 void gnFilter::ReverseFilter( string &seq ) const
00196 {
00197 gnSeqC tmp, dum;
00198 uint32 halfLen = seq.length()/2;
00199 uint32 end = seq.length() - 1;
00200 uint32 curB = 0;
00201 uint32 curE = end;
00202 for( uint32 i=0; i < halfLen ; ++i )
00203 {
00204 tmp = m_pairArray[seq[i]];
00205 dum = m_pairArray[seq[ end - i ]];
00206 if(dum != NO_REVCOMP_CHAR)
00207 seq[ curB++ ] = dum;
00208 if(tmp != NO_REVCOMP_CHAR)
00209 seq[ curE-- ] = tmp;
00210 }
00211 if(seq.length()&0x1){
00212 tmp = m_pairArray[seq[halfLen]];
00213 if(tmp != NO_REVCOMP_CHAR)
00214 seq[curB++] = tmp;
00215 }
00216
00217 if(curE >= curB){
00218 seq.erase(curB, curE-curB);
00219 }
00220 }
00221
00222
00223 void gnFilter::CreateAlphabetCharacterFilter()
00224 {
00225 SetDefaultChar( 0, 0 );
00226 SetName( "Alphabet Character Filter" );
00227 SetPair( 'A', 'a' );
00228 SetPair( 'B', 'b' );
00229 SetPair( 'C', 'c' );
00230 SetPair( 'D', 'd' );
00231 SetPair( 'E', 'e' );
00232 SetPair( 'F', 'f' );
00233 SetPair( 'G', 'g' );
00234 SetPair( 'H', 'h' );
00235 SetPair( 'I', 'i' );
00236 SetPair( 'J', 'j' );
00237 SetPair( 'K', 'k' );
00238 SetPair( 'L', 'l' );
00239 SetPair( 'M', 'm' );
00240 SetPair( 'N', 'n' );
00241 SetPair( 'O', 'o' );
00242 SetPair( 'P', 'p' );
00243 SetPair( 'Q', 'q' );
00244 SetPair( 'R', 'r' );
00245 SetPair( 'S', 's' );
00246 SetPair( 'T', 't' );
00247 SetPair( 'U', 'u' );
00248 SetPair( 'V', 'v' );
00249 SetPair( 'W', 'w' );
00250 SetPair( 'X', 'x' );
00251 SetPair( 'Y', 'y' );
00252 SetPair( 'Z', 'z' );
00253 }
00254
00255 void gnFilter::CreateNumberCharacterFilter()
00256 {
00257 SetDefaultChar( 0, 0 );
00258 SetName( "Number Character Filter" );
00259 SetSingle( '0' );
00260 SetSingle( '1' );
00261 SetSingle( '2' );
00262 SetSingle( '3' );
00263 SetSingle( '4' );
00264 SetSingle( '5' );
00265 SetSingle( '6' );
00266 SetSingle( '7' );
00267 SetSingle( '8' );
00268 SetSingle( '9' );
00269 }
00270
00271 void gnFilter::CreateProteinFilter()
00272 {
00273 SetDefaultChar( 'u', 'u' );
00274 SetName( "Protein Filter" );
00275 SetSingle( 'A' );
00276 SetSingle( 'R' );
00277 SetSingle( 'N' );
00278 SetSingle( 'D' );
00279 SetSingle( 'C' );
00280 SetSingle( 'Q' );
00281 SetSingle( 'E' );
00282 SetSingle( 'G' );
00283 SetSingle( 'H' );
00284 SetSingle( 'I' );
00285 SetSingle( 'L' );
00286 SetSingle( 'K' );
00287 SetSingle( 'M' );
00288 SetSingle( 'F' );
00289 SetSingle( 'P' );
00290 SetSingle( 'S' );
00291 SetSingle( 'T' );
00292 SetSingle( 'W' );
00293 SetSingle( 'Y' );
00294 SetSingle( 'V' );
00295
00296 SetSingle( 'a' );
00297 SetSingle( 'r' );
00298 SetSingle( 'n' );
00299 SetSingle( 'd' );
00300 SetSingle( 'c' );
00301 SetSingle( 'q' );
00302 SetSingle( 'e' );
00303 SetSingle( 'g' );
00304 SetSingle( 'h' );
00305 SetSingle( 'i' );
00306 SetSingle( 'l' );
00307 SetSingle( 'k' );
00308 SetSingle( 'm' );
00309 SetSingle( 'f' );
00310 SetSingle( 'p' );
00311 SetSingle( 's' );
00312 SetSingle( 't' );
00313 SetSingle( 'w' );
00314 SetSingle( 'y' );
00315 SetSingle( 'v' );
00316 }
00317
00318 void gnFilter::CreateBasicDNAFilter()
00319 {
00320 SetDefaultChar( 'n', 'n' );
00321 SetName( "Basic DNA Filter" );
00322 SetSingle( 'a' );
00323 SetSingle( 'c' );
00324 SetSingle( 'g' );
00325 SetSingle( 't' );
00326 SetSingle( 'A' );
00327 SetSingle( 'C' );
00328 SetSingle( 'G' );
00329 SetSingle( 'T' );
00330 SetSingle( 'n' );
00331 SetSingle( 'N' );
00332 SetSingle( 'x' );
00333 SetSingle( 'X' );
00334 SetSingle( '-' );
00335 }
00336 void gnFilter::CreateFullDNAFilter()
00337 {
00338 SetDefaultChar( 'n', 'n' );
00339 SetName( "Full DNA Filter" );
00340 SetSingle( 'a' );
00341 SetSingle( 'c' );
00342 SetSingle( 'g' );
00343 SetSingle( 't' );
00344 SetSingle( 'A' );
00345 SetSingle( 'C' );
00346 SetSingle( 'G' );
00347 SetSingle( 'T' );
00348 SetSingle( 'r' );
00349 SetSingle( 'y' );
00350 SetSingle( 'k' );
00351 SetSingle( 'm' );
00352 SetSingle( 'b' );
00353 SetSingle( 'v' );
00354 SetSingle( 'd' );
00355 SetSingle( 'h' );
00356 SetSingle( 'R' );
00357 SetSingle( 'Y' );
00358 SetSingle( 'K' );
00359 SetSingle( 'M' );
00360 SetSingle( 'B' );
00361 SetSingle( 'V' );
00362 SetSingle( 'D' );
00363 SetSingle( 'H' );
00364 SetSingle( 's' );
00365 SetSingle( 'S' );
00366 SetSingle( 'w' );
00367 SetSingle( 'W' );
00368 SetSingle( 'n' );
00369 SetSingle( 'N' );
00370 SetSingle( 'x' );
00371 SetSingle( 'X' );
00372 SetSingle( '-' );
00373 }
00374 void gnFilter::CreateBasicRNAFilter()
00375 {
00376 SetDefaultChar( 'n', 'n' );
00377 SetName( "Basic RNA Filter" );
00378 SetSingle( 'a' );
00379 SetSingle( 'c' );
00380 SetSingle( 'g' );
00381 SetSingle( 'u' );
00382 SetSingle( 'A' );
00383 SetSingle( 'C' );
00384 SetSingle( 'G' );
00385 SetSingle( 'U' );
00386 SetSingle( 'n' );
00387 SetSingle( 'N' );
00388 SetSingle( '-' );
00389 }
00390 void gnFilter::CreateFullRNAFilter()
00391 {
00392 SetDefaultChar( 'n', 'n' );
00393 SetName( "Full RNA Filter" );
00394 SetSingle( 'a' );
00395 SetSingle( 'c' );
00396 SetSingle( 'g' );
00397 SetSingle( 'u' );
00398 SetSingle( 'A' );
00399 SetSingle( 'C' );
00400 SetSingle( 'G' );
00401 SetSingle( 'U' );
00402 SetSingle( 'r' );
00403 SetSingle( 'y' );
00404 SetSingle( 'k' );
00405 SetSingle( 'm' );
00406 SetSingle( 'b' );
00407 SetSingle( 'v' );
00408 SetSingle( 'd' );
00409 SetSingle( 'h' );
00410 SetSingle( 'R' );
00411 SetSingle( 'Y' );
00412 SetSingle( 'K' );
00413 SetSingle( 'M' );
00414 SetSingle( 'B' );
00415 SetSingle( 'V' );
00416 SetSingle( 'D' );
00417 SetSingle( 'H' );
00418 SetSingle( 's' );
00419 SetSingle( 'S' );
00420 SetSingle( 'w' );
00421 SetSingle( 'W' );
00422 SetSingle( 'n' );
00423 SetSingle( 'N' );
00424 SetSingle( '-' );
00425 }
00426
00427
00428 void gnFilter::CreateDNAtoRNAFilter(){
00429 SetDefaultChar( 'n', 'n' );
00430 SetName( "Full DNA to RNA Filter" );
00431 SetSingle( 'a' );
00432 SetSingle( 'c' );
00433 SetSingle( 'g' );
00434 SetPair( 't', 'u' );
00435 SetSingle( 'A' );
00436 SetSingle( 'C' );
00437 SetSingle( 'G' );
00438 SetPair( 'T', 'U' );
00439 SetSingle( 'r' );
00440 SetSingle( 'y' );
00441 SetSingle( 'k' );
00442 SetSingle( 'm' );
00443 SetSingle( 'b' );
00444 SetSingle( 'v' );
00445 SetSingle( 'd' );
00446 SetSingle( 'h' );
00447 SetSingle( 'R' );
00448 SetSingle( 'Y' );
00449 SetSingle( 'K' );
00450 SetSingle( 'M' );
00451 SetSingle( 'B' );
00452 SetSingle( 'V' );
00453 SetSingle( 'D' );
00454 SetSingle( 'H' );
00455 SetSingle( 's' );
00456 SetSingle( 'S' );
00457 SetSingle( 'w' );
00458 SetSingle( 'W' );
00459 SetSingle( 'n' );
00460 SetSingle( 'N' );
00461 SetSingle( '-' );
00462 }
00463
00464 void gnFilter::CreateRNAtoDNAFilter(){
00465 SetDefaultChar( 'n', 'n' );
00466 SetName( "Full RNA to DNA Filter" );
00467 SetSingle( 'a' );
00468 SetSingle( 'c' );
00469 SetSingle( 'g' );
00470 SetPair( 'u', 't' );
00471 SetSingle( 'A' );
00472 SetSingle( 'C' );
00473 SetSingle( 'G' );
00474 SetPair( 'U', 'T' );
00475 SetSingle( 'r' );
00476 SetSingle( 'y' );
00477 SetSingle( 'k' );
00478 SetSingle( 'm' );
00479 SetSingle( 'b' );
00480 SetSingle( 'v' );
00481 SetSingle( 'd' );
00482 SetSingle( 'h' );
00483 SetSingle( 'R' );
00484 SetSingle( 'Y' );
00485 SetSingle( 'K' );
00486 SetSingle( 'M' );
00487 SetSingle( 'B' );
00488 SetSingle( 'V' );
00489 SetSingle( 'D' );
00490 SetSingle( 'H' );
00491 SetSingle( 's' );
00492 SetSingle( 'S' );
00493 SetSingle( 'w' );
00494 SetSingle( 'W' );
00495 SetSingle( 'n' );
00496 SetSingle( 'N' );
00497 SetSingle( '-' );
00498 }
00499
00500 void gnFilter::CreateDNAComplementFilter(){
00501 SetDefaultChar( 'n', 'n' );
00502 SetName( "Full DNA Complement Filter" );
00503 SetPair( 'a', 't' );
00504 SetPair( 'A', 'T' );
00505 SetPair( 't', 'a' );
00506 SetPair( 'T', 'A' );
00507 SetPair( 'c', 'g' );
00508 SetPair( 'C', 'G' );
00509 SetPair( 'g', 'c' );
00510 SetPair( 'G', 'C' );
00511 SetPair( 'r', 'y' );
00512 SetPair( 'R', 'Y' );
00513 SetPair( 'y', 'r' );
00514 SetPair( 'Y', 'R' );
00515 SetPair( 'k', 'm' );
00516 SetPair( 'K', 'M' );
00517 SetPair( 'm', 'k' );
00518 SetPair( 'M', 'K' );
00519 SetSingle( 's' );
00520 SetSingle( 'S' );
00521 SetSingle( 'w' );
00522 SetSingle( 'W' );
00523 SetPair( 'b', 'v' );
00524 SetPair( 'B', 'V' );
00525 SetPair( 'v', 'b' );
00526 SetPair( 'V', 'B' );
00527 SetPair( 'd', 'h' );
00528 SetPair( 'D', 'H' );
00529 SetPair( 'h', 'd' );
00530 SetPair( 'H', 'D' );
00531 SetSingle( 'n' );
00532 SetSingle( 'N' );
00533 SetSingle( 'x' );
00534 SetSingle( 'X' );
00535 SetSingle( '-' );
00536 }
00537
00538 void gnFilter::CreateRNAComplementFilter(){
00539 SetDefaultChar( 'n', 'n' );
00540 SetName( "Full RNA Complement Filter" );
00541 SetPair( 'a', 'u' );
00542 SetPair( 'A', 'U' );
00543 SetPair( 'u', 'a' );
00544 SetPair( 'U', 'A' );
00545 SetPair( 'c', 'g' );
00546 SetPair( 'C', 'G' );
00547 SetPair( 'g', 'c' );
00548 SetPair( 'G', 'C' );
00549 SetPair( 'r', 'y' );
00550 SetPair( 'R', 'Y' );
00551 SetPair( 'y', 'r' );
00552 SetPair( 'Y', 'R' );
00553 SetPair( 'k', 'm' );
00554 SetPair( 'K', 'M' );
00555 SetPair( 'm', 'k' );
00556 SetPair( 'M', 'K' );
00557 SetSingle( 's' );
00558 SetSingle( 'S' );
00559 SetSingle( 'w' );
00560 SetSingle( 'W' );
00561 SetPair( 'b', 'v' );
00562 SetPair( 'B', 'V' );
00563 SetPair( 'v', 'b' );
00564 SetPair( 'V', 'B' );
00565 SetPair( 'd', 'h' );
00566 SetPair( 'D', 'H' );
00567 SetPair( 'h', 'd' );
00568 SetPair( 'H', 'D' );
00569 SetSingle( 'n' );
00570 SetSingle( 'N' );
00571 SetSingle( '-' );
00572 }