00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00013
00014 #include "gn/gnSourceFactory.h"
00015 #include "gn/gnBaseSource.h"
00016 #include "gn/gnGenomeSpec.h"
00017 #include "gn/gnFragmentSpec.h"
00018 #include "gn/gnSourceSpec.h"
00019 #include "gn/gnStringSpec.h"
00020 #include "gn/gnStringHeader.h"
00021
00022
00023 gnSequence::gnSequence( )
00024 {
00025 spec = new gnGenomeSpec();
00026 comparator = gnCompare::DNASeqCompare();
00027 }
00028 gnSequence::gnSequence( const gnSeqC* seq){
00029 spec = new gnGenomeSpec();
00030 if (seq[0] != 0){
00031 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00032 spec->AddSpec(fragSpec);
00033 fragSpec->AddSpec(new gnStringSpec(seq));
00034 }
00035 comparator = gnCompare::DNASeqCompare();
00036 }
00037 gnSequence::gnSequence( const string& str){
00038 spec = new gnGenomeSpec();
00039 if (str.length() != 0){
00040 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00041 spec->AddSpec(fragSpec);
00042 fragSpec->AddSpec(new gnStringSpec(str));
00043 }
00044 comparator = gnCompare::DNASeqCompare();
00045 }
00046 gnSequence::gnSequence( const gnGenomeSpec& gngs){
00047 spec = gngs.Clone();
00048 comparator = gnCompare::DNASeqCompare();
00049 }
00050 gnSequence::gnSequence( const gnFragmentSpec& gnfs){
00051 spec = new gnGenomeSpec();
00052 spec->AddSpec(gnfs.Clone());
00053 comparator = gnCompare::DNASeqCompare();
00054 }
00055 gnSequence::gnSequence( const gnContigSpec& gcs){
00056 spec = new gnGenomeSpec();
00057 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00058 fragSpec->AddSpec(gcs.Clone());
00059 comparator = gnCompare::DNASeqCompare();
00060 }
00061 gnSequence::gnSequence( gnSeqC *bases, gnSeqI length){
00062 spec = new gnGenomeSpec();
00063 if (length > 0){
00064 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00065 spec->AddSpec(fragSpec);
00066 fragSpec->AddSpec(new gnStringSpec(string(bases, length)));
00067 }
00068 comparator = gnCompare::DNASeqCompare();
00069 }
00070 gnSequence::gnSequence( const gnSequence& seq )
00071 {
00072 spec = seq.spec->Clone();
00073 filter_list = seq.filter_list;
00074 comparator = seq.comparator;
00075 }
00076
00077 gnSequence::~gnSequence()
00078 {
00079 if (spec != NULL) delete spec;
00080 }
00081
00082 gnSequence& gnSequence::operator=( const gnSequence& seq){
00083 spec = seq.spec->Clone();
00084 filter_list = seq.filter_list;
00085 comparator = seq.comparator;
00086 return *this;
00087 }
00088
00089
00090 gnSequence* gnSequence::Clone() const
00091 {
00092 return new gnSequence( *this );
00093 }
00094
00095 int gnSequence::compare(const string& str) const{
00096 STACK_TRACE_START
00097 gnSeqI len = length();
00098 gnSeqI seq_len = str.length();
00099 gnSeqI comparelen = len < seq_len ? len : seq_len;
00100 gnSeqI compared = 0;
00101 while(comparelen > 0){
00102 gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00103 string bases = ToString(compared, curlen);
00104 string seq_bases = str.substr(compared, curlen);
00105 if(comparator->LessThan(bases, seq_bases))
00106 return -1;
00107 else if(comparator->LessThan(seq_bases, bases))
00108 return 1;
00109
00110 compared += curlen;
00111 comparelen -= curlen;
00112 }
00113 if(len < seq_len)
00114 return -1;
00115 else if(len > seq_len)
00116 return 1;
00117 return 0;
00118 STACK_TRACE_END
00119 }
00120
00121 int gnSequence::compare(const gnSequence& seq) const{
00122 STACK_TRACE_START
00123 gnSeqI len = length();
00124 gnSeqI seq_len = seq.length();
00125 gnSeqI comparelen = len < seq_len ? len : seq_len;
00126 gnSeqI compared = 0;
00127 while(comparelen > 0){
00128 gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00129 string bases = ToString(curlen, compared+1);
00130 string seq_bases = seq.ToString(curlen, compared+1);
00131 if(comparator->LessThan(bases, seq_bases))
00132 return -1;
00133 else if(comparator->LessThan(seq_bases, bases))
00134 return 1;
00135
00136 compared+= curlen;
00137 comparelen -= curlen;
00138 }
00139 if(len < seq_len)
00140 return -1;
00141 else if(len > seq_len)
00142 return 1;
00143 return 0;
00144 STACK_TRACE_END
00145 }
00146
00147 void gnSequence::insert( const gnSeqI offset, const gnSeqC *bases, const gnSeqI len){
00148 STACK_TRACE_START
00149 string str(bases, len);
00150 gnStringSpec gpbs(str);
00151 insert(offset, gpbs);
00152 STACK_TRACE_END
00153 }
00154
00155
00156 void gnSequence::insert( const gnSeqI offset, const gnGenomeSpec& gpbs){
00157 STACK_TRACE_START
00158 if(offset == 0)
00159 Throw_gnEx(SeqIndexOutOfBounds());
00160 if(offset == GNSEQI_END || spec->GetLength() < offset){
00161 for(uint32 i=0; i < gpbs.GetSpecListLength(); i++)
00162 spec->AddSpec(gpbs.GetSpec(i)->Clone());
00163 return;
00164 }
00165
00166 gnGenomeSpec* tmpSpec = spec->Clone();
00167
00168
00169 gnSeqI real_offset = offset - 1;
00170 gnSeqI endCrop = spec->GetLength() - real_offset;
00171 spec->CropEnd(endCrop);
00172 tmpSpec->CropStart(real_offset);
00173
00174 insert(GNSEQI_END, gpbs);
00175 insert(GNSEQI_END, *tmpSpec);
00176 delete tmpSpec;
00177 STACK_TRACE_END
00178 }
00179
00180 gnSequence const gnSequence::operator+(const gnSequence& seq) const{
00181 STACK_TRACE_START
00182 gnSequence rval(*this);
00183 rval.append(seq);
00184 return rval;
00185 STACK_TRACE_END
00186 }
00187
00188 gnSequence gnSequence::subseq(const gnSeqI offset, const gnSeqI length) const{
00189 STACK_TRACE_START
00190 if(offset == 0)
00191 Throw_gnEx(SeqIndexOutOfBounds());
00192 if(length == 0)
00193 return gnSequence();
00194
00195 gnSequence tmpSeq(*spec->CloneRange(offset - 1, length));
00196 return tmpSeq;
00197 STACK_TRACE_END
00198 }
00199 void gnSequence::erase( const gnSeqI offset, const gnSeqI len ){
00200 STACK_TRACE_START
00201 if(offset == 0)
00202 Throw_gnEx(SeqIndexOutOfBounds());
00203
00204 gnSeqI current_length = length();
00205 if(offset > current_length)
00206 Throw_gnEx(SeqIndexOutOfBounds());
00207 gnSeqI endBase = offset + len - 1 < current_length ? offset + len - 1 : current_length;
00208 gnSeqI startBase = offset - 1;
00209
00210 gnGenomeSpec* tmpSpec = spec->CloneRange(endBase, GNSEQI_END);
00211 uint32 lennard = tmpSpec->GetLength();
00212 spec->CropEnd(current_length - startBase);
00213 lennard = length();
00214 insert(GNSEQI_END, *tmpSpec);
00215 delete tmpSpec;
00216 STACK_TRACE_END
00217 }
00218 void gnSequence::splitContig(const gnSeqI splitI, const uint32 contigI){
00219 STACK_TRACE_START
00220 gnSeqI splitBase = splitI;
00221 gnSeqI current_length = length();
00222 if(splitI == 0)
00223 Throw_gnEx(SeqIndexOutOfBounds());
00224 if(contigI == ALL_CONTIGS && splitI > current_length)
00225 Throw_gnEx(SeqIndexOutOfBounds());
00226 else
00227 localToGlobal(contigI, splitBase);
00228 gnGenomeSpec* tmpSpec = spec->Clone();
00229 tmpSpec->CropStart(splitBase);
00230 spec->CropEnd(current_length - splitBase);
00231
00232 insert(GNSEQI_END, *tmpSpec);
00233 delete tmpSpec;
00234 STACK_TRACE_END
00235 }
00236
00237
00238 istream& operator>>(istream& is, gnSequence& gps){
00239 string bases;
00240 is >> bases;
00241 gps.append(bases);
00242 return is;
00243 }
00244 ostream& operator<<(ostream& os, const gnSequence& gps){
00245 os << gps.ToString();
00246 return os;
00247 }
00248 string gnSequence::ToString( const gnSeqI len, const gnSeqI offset ) const
00249 {
00250 STACK_TRACE_START
00251 string str;
00252 ToString(str, len, offset);
00253 return str;
00254 STACK_TRACE_END
00255 }
00256 boolean gnSequence::ToString( string& str, const gnSeqI len, const gnSeqI offset ) const
00257 {
00258 STACK_TRACE_START
00259 gnSeqI real_offset = offset - 1;
00260 gnSeqI m_length = length();
00261 gnSeqI readSize = len > m_length - real_offset ? m_length - real_offset : len;
00262 char *buf = new char[readSize+1];
00263 boolean success = spec->SeqRead( real_offset, buf, readSize, ALL_CONTIGS);
00264 buf[readSize] = '\0';
00265 str = buf;
00266 delete[] buf;
00267
00268
00269 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00270 for(; iter != filter_list.end(); iter++)
00271 (*iter)->Filter(str);
00272
00273 if( success )
00274 return true;
00275 return false;
00276 STACK_TRACE_END
00277 }
00278 boolean gnSequence::ToArray( gnSeqC* pSeqC, gnSeqI length, const gnSeqI offset ) const
00279 {
00280 STACK_TRACE_START
00281 gnSeqI real_offset = offset - 1;
00282 if(offset == GNSEQI_END)
00283 return false;
00284 gnSeqC* tmp = new gnSeqC[length];
00285 boolean success = spec->SeqRead( real_offset, tmp, length, ALL_CONTIGS);
00286
00287
00288 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00289 for(; iter != filter_list.end(); iter++)
00290 (*iter)->Filter(&tmp, length);
00291 memcpy(pSeqC, tmp, length);
00292 delete[] tmp;
00293
00294 if( success )
00295 return true;
00296
00297 return false;
00298 STACK_TRACE_END
00299 }
00300 gnSeqC gnSequence::GetSeqC( const gnSeqI offset ) const
00301 {
00302 STACK_TRACE_START
00303 char block;
00304 gnSeqI readLen = 1;
00305 gnSeqI real_offset = offset - 1;
00306 boolean success = spec->SeqRead( real_offset, &block, readLen, ALL_CONTIGS);
00307
00308
00309 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00310 for(; iter != filter_list.end(); iter++)
00311 block = (*iter)->Filter(block);
00312
00313 if( success )
00314 return block;
00315
00316 return GNSEQC_NULL;
00317 STACK_TRACE_END
00318 }
00319 gnSeqC gnSequence::operator[]( const gnSeqI offset ) const
00320 {
00321 STACK_TRACE_START
00322 return GetSeqC( offset );
00323 STACK_TRACE_END
00324 }
00325
00326 gnSeqI gnSequence::contigListSize() const{
00327 STACK_TRACE_START
00328 return spec->GetSpecListLength();
00329 STACK_TRACE_END
00330 }
00331 gnSeqI gnSequence::contigListLength() const{
00332 STACK_TRACE_START
00333 return spec->GetSpecListLength();
00334 STACK_TRACE_END
00335 }
00336
00337
00338 uint32 gnSequence::contigIndexByBase( const gnSeqI baseI) const{
00339 STACK_TRACE_START
00340 return spec->GetSpecIndexByBase(baseI-1);
00341 STACK_TRACE_END
00342 }
00343 gnSequence gnSequence::contig( const uint32 contigI) const{
00344 STACK_TRACE_START
00345 if(contigI == ALL_CONTIGS)
00346 return *this;
00347 return gnSequence(*spec->GetSpec(contigI));
00348 STACK_TRACE_END
00349 }
00350
00351 gnSequence gnSequence::contigByBase( const gnSeqI baseI) const{
00352 STACK_TRACE_START
00353 return gnSequence(*spec->GetSpecByBase(baseI-1));
00354 STACK_TRACE_END
00355 }
00356 uint32 gnSequence::contigIndexByName( string& contigName) const{
00357 STACK_TRACE_START
00358 return spec->GetSpecIndexByName(contigName);
00359 STACK_TRACE_END
00360 }
00361
00362 gnSeqI gnSequence::contigStart( const uint32 contigI) const{
00363 STACK_TRACE_START
00364 int64 leafI = contigI;
00365
00366 return spec->GetSpecStartBase( leafI ) + 1;
00367 STACK_TRACE_END
00368 }
00369
00370 gnSeqI gnSequence::contigLength( const uint32 contigI) const{
00371 STACK_TRACE_START
00372 return spec->GetSpec( contigI )->GetLength();
00373 STACK_TRACE_END
00374 }
00375
00376 string gnSequence::contigName( const uint32 contigI) const{
00377 STACK_TRACE_START
00378 return spec->GetSpec(contigI)->GetName();
00379 STACK_TRACE_END
00380 }
00381
00382 void gnSequence::globalToLocal(uint32& contigI, gnSeqI& baseI) const{
00383 STACK_TRACE_START
00384 contigI = contigIndexByBase(baseI);
00385 baseI -= (contigStart(contigI) - 1);
00386 STACK_TRACE_END
00387 }
00388
00389 void gnSequence::localToGlobal(const uint32 contigI, gnSeqI& baseI) const{
00390 STACK_TRACE_START
00391 if(baseI > contigLength(contigI))
00392 Throw_gnEx(SeqIndexOutOfBounds());
00393 baseI += contigStart(contigI) - 1;
00394 STACK_TRACE_END
00395 }
00396
00397 void gnSequence::globalToSource(uint32& contigI, gnSeqI& baseI) const{
00398 STACK_TRACE_START
00399 baseI--;
00400 gnSeqI seq_contig = spec->GetSpecIndexByBase(baseI);
00401 gnSeqI new_base = baseI - spec->GetSpecStartBase(seq_contig);
00402 gnFragmentSpec *fragment_spec = spec->GetSpec(seq_contig);
00403 seq_contig = fragment_spec->GetSpecIndexByBase(new_base);
00404 new_base = new_base - fragment_spec->GetSpecStartBase(seq_contig);
00405 gnContigSpec* contig_spec = fragment_spec->GetSpec(seq_contig);
00406 contigI = contig_spec->GetSourceContigIndex();
00407 gnSeqI contig_start = contig_spec->GetStart();
00408 if(contig_spec->IsReverseComplement()){
00409 gnSeqI source_len = contig_spec->GetSourceLength();
00410
00411
00412
00413
00414
00415
00416 baseI = (contig_start - new_base - 1 + source_len) % source_len;
00417 }else
00418 baseI = contig_start + new_base + 1;
00419 STACK_TRACE_END
00420 }
00421
00422 void gnSequence::localToSource(uint32& contigI, gnSeqI& baseI) const{
00423 STACK_TRACE_START
00424 localToGlobal(contigI, baseI);
00425 globalToSource(contigI, baseI);
00426 STACK_TRACE_END
00427 }
00428
00429 gnSequence gnSequence::contigByName( string& contigName) const{
00430 STACK_TRACE_START
00431 uint32 contigIndex = spec->GetSpecIndexByName(contigName);
00432 return gnSequence(*spec->GetSpec(contigIndex));
00433 STACK_TRACE_END
00434 }
00435
00436 void gnSequence::setContigName( const uint32 contigI, const string& contig_name) {
00437 STACK_TRACE_START
00438 if(contigI == ALL_CONTIGS)
00439 spec->SetName(contig_name);
00440 else
00441 spec->GetSpec(contigI)->SetName(contig_name);
00442 STACK_TRACE_END
00443 }
00444
00445 void gnSequence::setReverseComplement( const boolean revComp, const uint32 contigI){
00446 STACK_TRACE_START
00447 if(contigI == ALL_CONTIGS)
00448 spec->SetReverseComplement(revComp);
00449 else{
00450 gnFragmentSpec* contig = spec->GetSpec(contigI);
00451 contig->SetReverseComplement(revComp);
00452 }
00453 STACK_TRACE_END
00454 }
00455
00456 boolean gnSequence::isReverseComplement( const uint32 contigI ){
00457 STACK_TRACE_START
00458 if(contigI == ALL_CONTIGS)
00459 return spec->IsReverseComplement();
00460 gnFragmentSpec* contig = spec->GetSpec(contigI);
00461 return contig->IsReverseComplement();
00462 STACK_TRACE_END
00463 }
00464
00465 uint32 gnSequence::getHeaderListLength(const uint32 contigI) const{
00466 STACK_TRACE_START
00467 if(contigI == ALL_CONTIGS)
00468 return spec->GetHeaderListLength();
00469 else{
00470 return spec->GetSpec(contigI)->GetHeaderListLength();
00471 }
00472 STACK_TRACE_END
00473 }
00474
00475 gnBaseHeader* gnSequence::getHeader(const uint32 contigI, const uint32 headerI) const{
00476 STACK_TRACE_START
00477 if(contigI == ALL_CONTIGS)
00478 return spec->GetHeader(headerI);
00479 else{
00480 return spec->GetSpec(contigI)->GetHeader(headerI);
00481 }
00482 STACK_TRACE_END
00483 }
00484
00485 void gnSequence::addHeader(const uint32 contigI, gnBaseHeader* header, const uint32 headerI){
00486 STACK_TRACE_START
00487 if(contigI == ALL_CONTIGS)
00488 spec->AddHeader(header, headerI);
00489 else{
00490 spec->GetSpec(contigI)->AddHeader(header, headerI);
00491 }
00492 STACK_TRACE_END
00493 }
00494
00495 void gnSequence::removeHeader(const uint32 contigI, const uint32 headerI){
00496 STACK_TRACE_START
00497 if(contigI == ALL_CONTIGS)
00498 spec->RemoveHeader(headerI);
00499 else
00500 spec->GetSpec(contigI)->RemoveHeader(headerI);
00501 STACK_TRACE_END
00502 }
00503
00504 void gnSequence::merge(const gnSeqI startBase, const gnSeqI endBase){
00505 STACK_TRACE_START
00506 STACK_TRACE_END
00507 }
00508
00509 void gnSequence::mergeContigs(const uint32 startC, const uint32 endC){
00510 STACK_TRACE_START
00511 spec->MergeFragments(startC, endC);
00512 STACK_TRACE_END
00513 }
00514
00515 bool gnSequence::LoadSource(const string sourcename){
00516 STACK_TRACE_START
00517 gnSourceFactory* m_sourceFactory = gnSourceFactory::GetSourceFactory();
00518 gnBaseSource *m_pSource = m_sourceFactory->AddSource(sourcename, true);
00519 if (m_pSource!=NULL)
00520 {
00521 if(spec != NULL)
00522 delete spec;
00523 spec = m_pSource->GetSpec();
00524 return true;
00525 }
00526 return false;
00527 STACK_TRACE_END
00528 }
00529
00530 gnSeqI gnSequence::find(const gnSequence& search, const gnSeqI offset)const{
00531 STACK_TRACE_START
00532
00533 string searchIn=ToString();
00534 string find= search.ToString();
00535 string::size_type pos = searchIn.find(find, offset);
00536 if (pos == string::npos)
00537 return GNSEQI_ERROR;
00538 else{
00539 return pos++;
00540 }
00541 STACK_TRACE_END
00542 }