00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00013
00014 #ifndef _gnSEQSource_h_
00015 #define _gnSEQSource_h_
00016
00017 #include "gn/gnDefs.h"
00018
00019 #include <string>
00020 #include <fstream>
00021 #include <vector>
00022 #include "gn/gnFileSource.h"
00023 #include "gn/gnFileContig.h"
00024 #include "gn/gnSourceSpec.h"
00025 #include "gn/gnSequence.h"
00026
00034
00035 class GNDLLEXPORT gnSEQSource : public gnFileSource
00036 {
00037 public:
00041 gnSEQSource();
00046 gnSEQSource( const gnSEQSource& s );
00050 ~gnSEQSource();
00054 gnSEQSource* Clone() const;
00055
00056 uint32 GetContigListLength() const;
00057 boolean HasContig( const string& name ) const;
00058 uint32 GetContigID( const string& name ) const;
00059 string GetContigName( const uint32 i ) const;
00060 gnSeqI GetContigSeqLength( const uint32 i ) const;
00061
00062 boolean SeqRead( const gnSeqI start, char* buf, uint32& bufLen, const uint32 contigI=ALL_CONTIGS );
00063
00070 static boolean Write(gnSequence& sequence, const string& filename);
00077 static boolean Write(gnBaseSource *source, const string& filename);
00084 static boolean Write(gnGenomeSpec *spec, const string& filename);
00085 gnGenomeSpec *GetSpec() const;
00086 gnFileContig* GetFileContig( const uint32 contigI ) const;
00087 private:
00088 boolean SeqSeek( const gnSeqI start, const uint32& contigI, uint64& startPos, uint64& readableBytes );
00089 boolean SeqStartPos( const gnSeqI start, gnFileContig& contig, uint64& startPos, uint64& readableBytes );
00090 boolean ParseStream( istream& fin );
00091
00092 static string& Filler(uint32 length);
00093 static void FormatString(string& data, uint32 offset, uint32 width);
00094 static void BaseCount(const string& bases, gnSeqI& a_count, gnSeqI& c_count, gnSeqI& g_count, gnSeqI& t_count, gnSeqI& other_count);
00095
00096
00097 gnGenomeSpec *m_spec;
00098 vector< gnFileContig* > m_contigList;
00099 };
00100
00101 inline
00102 gnSEQSource* gnSEQSource::Clone() const
00103 {
00104 return new gnSEQSource( *this );
00105 }
00106
00107 inline
00108 uint32 gnSEQSource::GetContigListLength() const
00109 {
00110 return m_contigList.size();
00111 }
00112 inline
00113 boolean gnSEQSource::Write(gnSequence& sequence, const string& filename){
00114 return Write(sequence.GetSpec(), filename);
00115 }
00116 inline
00117 boolean gnSEQSource::Write(gnBaseSource *source, const string& filename){
00118 return Write(source->GetSpec(), filename);
00119 }
00120 inline
00121 gnGenomeSpec *gnSEQSource::GetSpec() const{
00122 return m_spec->Clone();
00123 }
00124
00125 #endif
00126
00127