00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00013
00014 #ifndef _gnGBKSource_h_
00015 #define _gnGBKSource_h_
00016
00017 #include "gn/gnDefs.h"
00018
00019 #include <string>
00020 #include <fstream>
00021 #include <vector>
00022 #include "gn/gnFileSource.h"
00023 #include "gn/gnFileContig.h"
00024 #include "gn/gnSourceSpec.h"
00025 #include "gn/gnSequence.h"
00026
00027 const uint32 SEQ_COLUMN_WIDTH = 80;
00028 const uint32 SEQ_HEADER_NAME_LENGTH = 11;
00029 const uint32 SEQ_SUBTAG_COLUMN = 5;
00030 const uint32 SEQ_LOCUS_CIRCULAR_COLUMN = 43;
00031 const uint32 SEQ_LOCUS_NAME_COLUMN = 13;
00032 const uint32 SEQ_LOCUS_NAME_LENGTH = 10;
00033 const uint32 SEQ_LOCUS_SIZE_LENGTH = 7;
00034 const uint32 SEQ_LOCUS_DNATYPE_OFFSET = 33;
00035 const uint32 SEQ_LOCUS_DNATYPE_LENGTH = 7;
00036 const uint32 SEQ_LOCUS_DIVCODE_OFFSET = 52;
00037 const uint32 SEQ_LOCUS_DIVCODE_LENGTH = 3;
00038 const uint32 SEQ_LOCUS_DATE_OFFSET = 62;
00039 const uint32 SEQ_LOCUS_DATE_LENGTH = 11;
00040 const uint32 SEQ_FEATURE_LOC_OFFSET = 21;
00041 const uint32 SEQ_BASES_INDEX_END = 9;
00042
00050
00051 class GNDLLEXPORT gnGBKSource : public gnFileSource
00052 {
00053 public:
00057 gnGBKSource();
00062 gnGBKSource( const gnGBKSource& s );
00066 ~gnGBKSource();
00070 gnGBKSource* Clone() const;
00071
00072 uint32 GetContigListLength() const;
00073 boolean HasContig( const string& name ) const;
00074 uint32 GetContigID( const string& name ) const;
00075 string GetContigName( const uint32 i ) const;
00076 gnSeqI GetContigSeqLength( const uint32 i ) const;
00077
00078 boolean SeqRead( const gnSeqI start, char* buf, uint32& bufLen, const uint32 contigI=ALL_CONTIGS );
00079
00086 static boolean Write(gnSequence& seq, const string& filename);
00093 static boolean Write(gnBaseSource *source, const string& filename);
00094 gnGenomeSpec *GetSpec() const;
00095 gnFileContig* GetFileContig( const uint32 contigI ) const;
00096 private:
00097 boolean SeqSeek( const gnSeqI start, const uint32& contigI, uint64& startPos, uint64& readableBytes );
00098 boolean SeqStartPos( const gnSeqI start, gnFileContig& contig, uint64& startPos, uint64& readableBytes );
00099 boolean ParseStream( istream& fin );
00100
00101 static string& Filler(uint32 length);
00102 static void FormatString(string& data, uint32 offset, uint32 width);
00103 static void WriteHeader(gnMultiSpec* spec, const string& hdr, ofstream& m_ofstream);
00104
00105
00106 gnGenomeSpec *m_spec;
00107 vector< gnFileContig* > m_contigList;
00108 };
00109
00110 inline
00111 gnGBKSource* gnGBKSource::Clone() const
00112 {
00113 return new gnGBKSource( *this );
00114 }
00115
00116 inline
00117 uint32 gnGBKSource::GetContigListLength() const
00118 {
00119 return m_contigList.size();
00120 }
00121 inline
00122 boolean gnGBKSource::Write(gnBaseSource *source, const string& filename){
00123 gnSequence gns(*source->GetSpec());
00124 return Write(gns, filename);
00125 }
00126 inline
00127 gnGenomeSpec *gnGBKSource::GetSpec() const{
00128 return m_spec->Clone();
00129 }
00130
00131 #endif
00132