00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "gn/gnSequence.h"
00013 #include "gn/gnSourceFactory.h"
00014 #include "gn/gnBaseSource.h"
00015 #include "gn/gnGenomeSpec.h"
00016 #include "gn/gnFragmentSpec.h"
00017 #include "gn/gnSourceSpec.h"
00018 #include "gn/gnStringSpec.h"
00019 #include "gn/gnStringHeader.h"
00020
00021
00022 gnSequence::gnSequence( )
00023 {
00024 spec = new gnGenomeSpec();
00025 comparator = gnCompare::DNASeqCompare();
00026 }
00027 gnSequence::gnSequence( const gnSeqC* seq){
00028 spec = new gnGenomeSpec();
00029 if (seq[0] != 0){
00030 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00031 spec->AddSpec(fragSpec);
00032 fragSpec->AddSpec(new gnStringSpec(seq));
00033 }
00034 comparator = gnCompare::DNASeqCompare();
00035 }
00036 gnSequence::gnSequence( const string& str){
00037 spec = new gnGenomeSpec();
00038 if (str.length() != 0){
00039 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00040 spec->AddSpec(fragSpec);
00041 fragSpec->AddSpec(new gnStringSpec(str));
00042 }
00043 comparator = gnCompare::DNASeqCompare();
00044 }
00045 gnSequence::gnSequence( const gnGenomeSpec& gngs){
00046 spec = gngs.Clone();
00047 comparator = gnCompare::DNASeqCompare();
00048 }
00049 gnSequence::gnSequence( const gnFragmentSpec& gnfs){
00050 spec = new gnGenomeSpec();
00051 spec->AddSpec(gnfs.Clone());
00052 comparator = gnCompare::DNASeqCompare();
00053 }
00054 gnSequence::gnSequence( const gnContigSpec& gcs){
00055 spec = new gnGenomeSpec();
00056 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00057 fragSpec->AddSpec(gcs.Clone());
00058 comparator = gnCompare::DNASeqCompare();
00059 }
00060 gnSequence::gnSequence( gnSeqC *bases, gnSeqI length){
00061 spec = new gnGenomeSpec();
00062 if (length > 0){
00063 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00064 spec->AddSpec(fragSpec);
00065 fragSpec->AddSpec(new gnStringSpec(string(bases, length)));
00066 }
00067 comparator = gnCompare::DNASeqCompare();
00068 }
00069 gnSequence::gnSequence( const gnSequence& seq )
00070 {
00071 *this = seq;
00072 }
00073
00074
00075 gnSequence::~gnSequence()
00076 {
00077 if (spec != NULL)
00078 delete spec;
00079 }
00080
00081 gnSequence& gnSequence::operator=( const gnSequence& seq){
00082 spec = seq.spec->Clone();
00083 filter_list = seq.filter_list;
00084 comparator = seq.comparator;
00085 return *this;
00086 }
00087
00088
00089 gnSequence* gnSequence::Clone() const
00090 {
00091 return new gnSequence( *this );
00092 }
00093
00094 int gnSequence::compare(const string& str) const{
00095 STACK_TRACE_START
00096 gnSeqI len = length();
00097 gnSeqI seq_len = str.length();
00098 gnSeqI comparelen = len < seq_len ? len : seq_len;
00099 gnSeqI compared = 0;
00100 while(comparelen > 0){
00101 gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00102 string bases = ToString(compared, curlen);
00103 string seq_bases = str.substr(compared, curlen);
00104 if(comparator->LessThan(bases, seq_bases))
00105 return -1;
00106 else if(comparator->LessThan(seq_bases, bases))
00107 return 1;
00108
00109 compared += curlen;
00110 comparelen -= curlen;
00111 }
00112 if(len < seq_len)
00113 return -1;
00114 else if(len > seq_len)
00115 return 1;
00116 return 0;
00117 STACK_TRACE_END
00118 }
00119
00120 int gnSequence::compare(const gnSequence& seq) const{
00121 STACK_TRACE_START
00122 gnSeqI len = length();
00123 gnSeqI seq_len = seq.length();
00124 gnSeqI comparelen = len < seq_len ? len : seq_len;
00125 gnSeqI compared = 0;
00126 while(comparelen > 0){
00127 gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00128 string bases = ToString(curlen, compared+1);
00129 string seq_bases = seq.ToString(curlen, compared+1);
00130 if(comparator->LessThan(bases, seq_bases))
00131 return -1;
00132 else if(comparator->LessThan(seq_bases, bases))
00133 return 1;
00134
00135 compared+= curlen;
00136 comparelen -= curlen;
00137 }
00138 if(len < seq_len)
00139 return -1;
00140 else if(len > seq_len)
00141 return 1;
00142 return 0;
00143 STACK_TRACE_END
00144 }
00145
00146 void gnSequence::insert( const gnSeqI offset, const gnSeqC *bases, const gnSeqI len){
00147 STACK_TRACE_START
00148 string str(bases, len);
00149 gnStringSpec gpbs(str);
00150 insert(offset, gpbs);
00151 STACK_TRACE_END
00152 }
00153
00154
00155 void gnSequence::insert( const gnSeqI offset, const gnGenomeSpec& gpbs){
00156 STACK_TRACE_START
00157 if(offset == 0)
00158 Throw_gnEx(SeqIndexOutOfBounds());
00159 if(offset == GNSEQI_END || spec->GetLength() < offset){
00160 for(uint32 i=0; i < gpbs.GetSpecListLength(); i++)
00161 spec->AddSpec(gpbs.GetSpec(i)->Clone());
00162 return;
00163 }
00164
00165 gnGenomeSpec* tmpSpec = spec->Clone();
00166
00167
00168 gnSeqI real_offset = offset - 1;
00169 gnSeqI endCrop = spec->GetLength() - real_offset;
00170 spec->CropEnd(endCrop);
00171 tmpSpec->CropStart(real_offset);
00172
00173 insert(GNSEQI_END, gpbs);
00174 insert(GNSEQI_END, *tmpSpec);
00175 delete tmpSpec;
00176 STACK_TRACE_END
00177 }
00178
00179 gnSequence const gnSequence::operator+(const gnSequence& seq) const{
00180 STACK_TRACE_START
00181 gnSequence rval(*this);
00182 rval.append(seq);
00183 return rval;
00184 STACK_TRACE_END
00185 }
00186
00187 gnSequence gnSequence::subseq(const gnSeqI offset, const gnSeqI length) const{
00188 STACK_TRACE_START
00189 if(length == 0)
00190 return gnSequence();
00191 if(offset == 0)
00192 Throw_gnEx(SeqIndexOutOfBounds());
00193
00194 gnSequence tmpSeq;
00195 delete tmpSeq.spec;
00196 tmpSeq.spec = spec->CloneRange(offset - 1, length);
00197 return tmpSeq;
00198 STACK_TRACE_END
00199 }
00200 void gnSequence::erase( const gnSeqI offset, const gnSeqI len ){
00201 STACK_TRACE_START
00202 if(offset == 0)
00203 Throw_gnEx(SeqIndexOutOfBounds());
00204
00205 gnSeqI current_length = length();
00206 if(offset > current_length)
00207 Throw_gnEx(SeqIndexOutOfBounds());
00208 gnSeqI endBase = offset + len - 1 < current_length ? offset + len - 1 : current_length;
00209 gnSeqI startBase = offset - 1;
00210
00211 gnGenomeSpec* tmpSpec = spec->CloneRange(endBase, GNSEQI_END);
00212 uint32 lennard = tmpSpec->GetLength();
00213 spec->CropEnd(current_length - startBase);
00214 lennard = length();
00215 insert(GNSEQI_END, *tmpSpec);
00216 delete tmpSpec;
00217 STACK_TRACE_END
00218 }
00219 void gnSequence::splitContig(const gnSeqI splitI, const uint32 contigI){
00220 STACK_TRACE_START
00221 gnSeqI splitBase = splitI;
00222 gnSeqI current_length = length();
00223 if(splitI == 0)
00224 Throw_gnEx(SeqIndexOutOfBounds());
00225 if(contigI == ALL_CONTIGS && splitI > current_length)
00226 Throw_gnEx(SeqIndexOutOfBounds());
00227 else
00228 localToGlobal(contigI, splitBase);
00229 gnGenomeSpec* tmpSpec = spec->Clone();
00230 tmpSpec->CropStart(splitBase);
00231 spec->CropEnd(current_length - splitBase);
00232
00233 insert(GNSEQI_END, *tmpSpec);
00234 delete tmpSpec;
00235 STACK_TRACE_END
00236 }
00237
00238
00239 istream& operator>>(istream& is, gnSequence& gps){
00240 string bases;
00241 is >> bases;
00242 gps.append(bases);
00243 return is;
00244 }
00245 ostream& operator<<(ostream& os, const gnSequence& gps){
00246 os << gps.ToString();
00247 return os;
00248 }
00249 string gnSequence::ToString( const gnSeqI len, const gnSeqI offset ) const
00250 {
00251 STACK_TRACE_START
00252 string str;
00253 ToString(str, len, offset);
00254 return str;
00255 STACK_TRACE_END
00256 }
00257 boolean gnSequence::ToString( string& str, const gnSeqI len, const gnSeqI offset ) const
00258 {
00259 STACK_TRACE_START
00260 gnSeqI real_offset = offset - 1;
00261 gnSeqI m_length = length();
00262 gnSeqI readSize = len > m_length - real_offset ? m_length - real_offset : len;
00263 Array<char> array_buf( readSize+1 );
00264 char *buf = array_buf.data;
00265 boolean success = spec->SeqRead( real_offset, buf, readSize, ALL_CONTIGS);
00266 buf[readSize] = '\0';
00267 str = buf;
00268
00269
00270 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00271 for(; iter != filter_list.end(); iter++)
00272 (*iter)->Filter(str);
00273
00274 if( success )
00275 return true;
00276 return false;
00277 STACK_TRACE_END
00278 }
00279 boolean gnSequence::ToArray( gnSeqC* pSeqC, gnSeqI length, const gnSeqI offset ) const
00280 {
00281 STACK_TRACE_START
00282 gnSeqI real_offset = offset - 1;
00283 if(offset == GNSEQI_END)
00284 return false;
00285 gnSeqC* tmp = new gnSeqC[length];
00286 boolean success = spec->SeqRead( real_offset, tmp, length, ALL_CONTIGS);
00287
00288
00289 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00290 gnSeqC** tomp = &tmp;
00291 for(; iter != filter_list.end(); iter++)
00292 (*iter)->Filter(tomp, length);
00293 memcpy(pSeqC, *tomp, length);
00294 delete[] *tomp;
00295
00296 if( success )
00297 return true;
00298
00299 return false;
00300 STACK_TRACE_END
00301 }
00302 gnSeqC gnSequence::GetSeqC( const gnSeqI offset ) const
00303 {
00304 STACK_TRACE_START
00305 char block;
00306 gnSeqI readLen = 1;
00307 gnSeqI real_offset = offset - 1;
00308 boolean success = spec->SeqRead( real_offset, &block, readLen, ALL_CONTIGS);
00309
00310
00311 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00312 for(; iter != filter_list.end(); iter++)
00313 block = (*iter)->Filter(block);
00314
00315 if( success )
00316 return block;
00317
00318 return GNSEQC_NULL;
00319 STACK_TRACE_END
00320 }
00321 gnSeqC gnSequence::operator[]( const gnSeqI offset ) const
00322 {
00323 STACK_TRACE_START
00324 return GetSeqC( offset );
00325 STACK_TRACE_END
00326 }
00327
00328 gnSeqI gnSequence::contigListSize() const{
00329 STACK_TRACE_START
00330 return spec->GetSpecListLength();
00331 STACK_TRACE_END
00332 }
00333 gnSeqI gnSequence::contigListLength() const{
00334 STACK_TRACE_START
00335 return spec->GetSpecListLength();
00336 STACK_TRACE_END
00337 }
00338
00339
00340 uint32 gnSequence::contigIndexByBase( const gnSeqI baseI) const{
00341 STACK_TRACE_START
00342 return spec->GetSpecIndexByBase(baseI-1);
00343 STACK_TRACE_END
00344 }
00345 gnSequence gnSequence::contig( const uint32 contigI) const{
00346 STACK_TRACE_START
00347 if(contigI == ALL_CONTIGS)
00348 return *this;
00349 return gnSequence(*spec->GetSpec(contigI));
00350 STACK_TRACE_END
00351 }
00352
00353 gnSequence gnSequence::contigByBase( const gnSeqI baseI) const{
00354 STACK_TRACE_START
00355 return gnSequence(*spec->GetSpecByBase(baseI-1));
00356 STACK_TRACE_END
00357 }
00358 uint32 gnSequence::contigIndexByName( string& contigName) const{
00359 STACK_TRACE_START
00360 return spec->GetSpecIndexByName(contigName);
00361 STACK_TRACE_END
00362 }
00363
00364 gnSeqI gnSequence::contigStart( const uint32 contigI) const{
00365 STACK_TRACE_START
00366 int64 leafI = contigI;
00367
00368 return spec->GetSpecStartBase( leafI ) + 1;
00369 STACK_TRACE_END
00370 }
00371
00372 gnSeqI gnSequence::contigLength( const uint32 contigI) const{
00373 STACK_TRACE_START
00374 return spec->GetSpec( contigI )->GetLength();
00375 STACK_TRACE_END
00376 }
00377
00378 string gnSequence::contigName( const uint32 contigI) const{
00379 STACK_TRACE_START
00380 return spec->GetSpec(contigI)->GetName();
00381 STACK_TRACE_END
00382 }
00383
00384 void gnSequence::globalToLocal(uint32& contigI, gnSeqI& baseI) const{
00385 STACK_TRACE_START
00386 contigI = contigIndexByBase(baseI);
00387 baseI -= (contigStart(contigI) - 1);
00388 STACK_TRACE_END
00389 }
00390
00391 void gnSequence::localToGlobal(const uint32 contigI, gnSeqI& baseI) const{
00392 STACK_TRACE_START
00393 if(baseI > contigLength(contigI))
00394 Throw_gnEx(SeqIndexOutOfBounds());
00395 baseI += contigStart(contigI) - 1;
00396 STACK_TRACE_END
00397 }
00398
00399 void gnSequence::globalToSource(uint32& contigI, gnSeqI& baseI) const{
00400 STACK_TRACE_START
00401 baseI--;
00402 gnSeqI seq_contig = spec->GetSpecIndexByBase(baseI);
00403 gnSeqI new_base = baseI - spec->GetSpecStartBase(seq_contig);
00404 gnFragmentSpec *fragment_spec = spec->GetSpec(seq_contig);
00405 seq_contig = fragment_spec->GetSpecIndexByBase(new_base);
00406 new_base = new_base - fragment_spec->GetSpecStartBase(seq_contig);
00407 gnContigSpec* contig_spec = fragment_spec->GetSpec(seq_contig);
00408 contigI = contig_spec->GetSourceContigIndex();
00409 gnSeqI contig_start = contig_spec->GetStart();
00410 if(contig_spec->IsReverseComplement()){
00411 gnSeqI source_len = contig_spec->GetSourceLength();
00412
00413
00414
00415
00416
00417
00418 baseI = (contig_start - new_base - 1 + source_len) % source_len;
00419 }else
00420 baseI = contig_start + new_base + 1;
00421 STACK_TRACE_END
00422 }
00423
00424 void gnSequence::localToSource(uint32& contigI, gnSeqI& baseI) const{
00425 STACK_TRACE_START
00426 localToGlobal(contigI, baseI);
00427 globalToSource(contigI, baseI);
00428 STACK_TRACE_END
00429 }
00430
00431 gnSequence gnSequence::contigByName( string& contigName) const{
00432 STACK_TRACE_START
00433 uint32 contigIndex = spec->GetSpecIndexByName(contigName);
00434 return gnSequence(*spec->GetSpec(contigIndex));
00435 STACK_TRACE_END
00436 }
00437
00438 void gnSequence::setContigName( const uint32 contigI, const string& contig_name) {
00439 STACK_TRACE_START
00440 if(contigI == ALL_CONTIGS)
00441 spec->SetName(contig_name);
00442 else
00443 spec->GetSpec(contigI)->SetName(contig_name);
00444 STACK_TRACE_END
00445 }
00446
00447 void gnSequence::setReverseComplement( const boolean revComp, const uint32 contigI){
00448 STACK_TRACE_START
00449 if(contigI == ALL_CONTIGS)
00450 spec->SetReverseComplement(revComp);
00451 else{
00452 gnFragmentSpec* contig = spec->GetSpec(contigI);
00453 contig->SetReverseComplement(revComp);
00454 }
00455 STACK_TRACE_END
00456 }
00457
00458 boolean gnSequence::isReverseComplement( const uint32 contigI ){
00459 STACK_TRACE_START
00460 if(contigI == ALL_CONTIGS)
00461 return spec->IsReverseComplement();
00462 gnFragmentSpec* contig = spec->GetSpec(contigI);
00463 return contig->IsReverseComplement();
00464 STACK_TRACE_END
00465 }
00466
00467 uint32 gnSequence::getHeaderListLength(const uint32 contigI) const{
00468 STACK_TRACE_START
00469 if(contigI == ALL_CONTIGS)
00470 return spec->GetHeaderListLength();
00471 else{
00472 return spec->GetSpec(contigI)->GetHeaderListLength();
00473 }
00474 STACK_TRACE_END
00475 }
00476
00477 gnBaseHeader* gnSequence::getHeader(const uint32 contigI, const uint32 headerI) const{
00478 STACK_TRACE_START
00479 if(contigI == ALL_CONTIGS)
00480 return spec->GetHeader(headerI);
00481 else{
00482 return spec->GetSpec(contigI)->GetHeader(headerI);
00483 }
00484 STACK_TRACE_END
00485 }
00486
00487 void gnSequence::addHeader(const uint32 contigI, gnBaseHeader* header, const uint32 headerI){
00488 STACK_TRACE_START
00489 if(contigI == ALL_CONTIGS)
00490 spec->AddHeader(header, headerI);
00491 else{
00492 spec->GetSpec(contigI)->AddHeader(header, headerI);
00493 }
00494 STACK_TRACE_END
00495 }
00496
00497 void gnSequence::removeHeader(const uint32 contigI, const uint32 headerI){
00498 STACK_TRACE_START
00499 if(contigI == ALL_CONTIGS)
00500 spec->RemoveHeader(headerI);
00501 else
00502 spec->GetSpec(contigI)->RemoveHeader(headerI);
00503 STACK_TRACE_END
00504 }
00505
00506 void gnSequence::merge(const gnSeqI startBase, const gnSeqI endBase){
00507 STACK_TRACE_START
00508 STACK_TRACE_END
00509 }
00510
00511 void gnSequence::mergeContigs(const uint32 startC, const uint32 endC){
00512 STACK_TRACE_START
00513 spec->MergeFragments(startC, endC);
00514 STACK_TRACE_END
00515 }
00516
00517 bool gnSequence::LoadSource(const string sourcename){
00518 STACK_TRACE_START
00519 gnSourceFactory* m_sourceFactory = gnSourceFactory::GetSourceFactory();
00520 gnBaseSource *m_pSource = m_sourceFactory->AddSource(sourcename, true);
00521 if (m_pSource!=NULL)
00522 {
00523 if(spec != NULL)
00524 delete spec;
00525 spec = m_pSource->GetSpec();
00526 return true;
00527 }
00528 return false;
00529 STACK_TRACE_END
00530 }
00531
00532 gnSeqI gnSequence::find(const gnSequence& search, const gnSeqI offset)const{
00533 STACK_TRACE_START
00534
00535 string searchIn=ToString();
00536 string find= search.ToString();
00537 string::size_type pos = searchIn.find(find, offset);
00538 if (pos == string::npos)
00539 return GNSEQI_ERROR;
00540 else{
00541 return pos++;
00542 }
00543 STACK_TRACE_END
00544 }