00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _gnSEQSource_h_
00014 #define _gnSEQSource_h_
00015
00016 #include "gn/gnDefs.h"
00017
00018 #include <string>
00019 #include <fstream>
00020 #include <vector>
00021 #include "gn/gnFileSource.h"
00022 #include "gn/gnFileContig.h"
00023 #include "gn/gnSourceSpec.h"
00024 #include "gn/gnSequence.h"
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 class GNDLLEXPORT gnSEQSource : public gnFileSource
00035 {
00036 public:
00037
00038
00039
00040 gnSEQSource();
00041
00042
00043
00044
00045 gnSEQSource( const gnSEQSource& s );
00046
00047
00048
00049 ~gnSEQSource();
00050
00051
00052
00053 gnSEQSource* Clone() const;
00054
00055 uint32 GetContigListLength() const;
00056 boolean HasContig( const string& name ) const;
00057 uint32 GetContigID( const string& name ) const;
00058 string GetContigName( const uint32 i ) const;
00059 gnSeqI GetContigSeqLength( const uint32 i ) const;
00060
00061 boolean SeqRead( const gnSeqI start, char* buf, uint32& bufLen, const uint32 contigI=ALL_CONTIGS );
00062
00063
00064
00065
00066
00067
00068
00069 static boolean Write(gnSequence& sequence, const string& filename);
00070
00071
00072
00073
00074
00075
00076 static boolean Write(gnBaseSource *source, const string& filename);
00077
00078
00079
00080
00081
00082
00083 static boolean Write(gnGenomeSpec *spec, const string& filename);
00084 gnGenomeSpec *GetSpec() const;
00085 gnFileContig* GetFileContig( const uint32 contigI ) const;
00086 private:
00087 boolean SeqSeek( const gnSeqI start, const uint32& contigI, uint64& startPos, uint64& readableBytes );
00088 boolean SeqStartPos( const gnSeqI start, gnFileContig& contig, uint64& startPos, uint64& readableBytes );
00089 boolean ParseStream( istream& fin );
00090
00091 static string& Filler(uint32 length);
00092 static void FormatString(string& data, uint32 offset, uint32 width);
00093 static void BaseCount(const string& bases, gnSeqI& a_count, gnSeqI& c_count, gnSeqI& g_count, gnSeqI& t_count, gnSeqI& other_count);
00094
00095
00096 gnGenomeSpec *m_spec;
00097 vector< gnFileContig* > m_contigList;
00098 };
00099
00100 inline
00101 gnSEQSource* gnSEQSource::Clone() const
00102 {
00103 return new gnSEQSource( *this );
00104 }
00105
00106 inline
00107 uint32 gnSEQSource::GetContigListLength() const
00108 {
00109 return m_contigList.size();
00110 }
00111 inline
00112 boolean gnSEQSource::Write(gnSequence& sequence, const string& filename){
00113 return Write(sequence.GetSpec(), filename);
00114 }
00115 inline
00116 boolean gnSEQSource::Write(gnBaseSource *source, const string& filename){
00117 return Write(source->GetSpec(), filename);
00118 }
00119 inline
00120 gnGenomeSpec *gnSEQSource::GetSpec() const{
00121 return m_spec->Clone();
00122 }
00123
00124 #endif
00125
00126