00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _gnGBKSource_h_
00014 #define _gnGBKSource_h_
00015
00016 #include "gn/gnDefs.h"
00017
00018 #include <string>
00019 #include <fstream>
00020 #include <vector>
00021 #include "gn/gnFileSource.h"
00022 #include "gn/gnFileContig.h"
00023 #include "gn/gnSourceSpec.h"
00024 #include "gn/gnSequence.h"
00025
00026 const uint32 SEQ_COLUMN_WIDTH = 80;
00027 const uint32 SEQ_HEADER_NAME_LENGTH = 11;
00028 const uint32 SEQ_SUBTAG_COLUMN = 5;
00029 const uint32 SEQ_LOCUS_CIRCULAR_COLUMN = 43;
00030 const uint32 SEQ_LOCUS_NAME_COLUMN = 13;
00031 const uint32 SEQ_LOCUS_NAME_LENGTH = 10;
00032 const uint32 SEQ_LOCUS_SIZE_LENGTH = 7;
00033 const uint32 SEQ_LOCUS_DNATYPE_OFFSET = 33;
00034 const uint32 SEQ_LOCUS_DNATYPE_LENGTH = 7;
00035 const uint32 SEQ_LOCUS_DIVCODE_OFFSET = 52;
00036 const uint32 SEQ_LOCUS_DIVCODE_LENGTH = 3;
00037 const uint32 SEQ_LOCUS_DATE_OFFSET = 62;
00038 const uint32 SEQ_LOCUS_DATE_LENGTH = 11;
00039 const uint32 SEQ_FEATURE_LOC_OFFSET = 21;
00040 const uint32 SEQ_BASES_INDEX_END = 9;
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 class GNDLLEXPORT gnGBKSource : public gnFileSource
00051 {
00052 public:
00053
00054
00055
00056 gnGBKSource();
00057
00058
00059
00060
00061 gnGBKSource( const gnGBKSource& s );
00062
00063
00064
00065 ~gnGBKSource();
00066
00067
00068
00069 gnGBKSource* Clone() const;
00070
00071 uint32 GetContigListLength() const;
00072 boolean HasContig( const string& name ) const;
00073 uint32 GetContigID( const string& name ) const;
00074 string GetContigName( const uint32 i ) const;
00075 gnSeqI GetContigSeqLength( const uint32 i ) const;
00076
00077 boolean SeqRead( const gnSeqI start, char* buf, uint32& bufLen, const uint32 contigI=ALL_CONTIGS );
00078
00079
00080
00081
00082
00083
00084
00085 static boolean Write(gnSequence& seq, const string& filename);
00086
00087
00088
00089
00090
00091
00092 static boolean Write(gnBaseSource *source, const string& filename);
00093 gnGenomeSpec *GetSpec() const;
00094 gnFileContig* GetFileContig( const uint32 contigI ) const;
00095 private:
00096 boolean SeqSeek( const gnSeqI start, const uint32& contigI, uint64& startPos, uint64& readableBytes );
00097 boolean SeqStartPos( const gnSeqI start, gnFileContig& contig, uint64& startPos, uint64& readableBytes );
00098 boolean ParseStream( istream& fin );
00099
00100 static string& Filler(uint32 length);
00101 static void FormatString(string& data, uint32 offset, uint32 width);
00102 static void WriteHeader(gnMultiSpec* spec, const string& hdr, ofstream& m_ofstream);
00103
00104
00105 gnGenomeSpec *m_spec;
00106 vector< gnFileContig* > m_contigList;
00107 };
00108
00109 inline
00110 gnGBKSource* gnGBKSource::Clone() const
00111 {
00112 return new gnGBKSource( *this );
00113 }
00114
00115 inline
00116 uint32 gnGBKSource::GetContigListLength() const
00117 {
00118 return m_contigList.size();
00119 }
00120 inline
00121 boolean gnGBKSource::Write(gnBaseSource *source, const string& filename){
00122 gnSequence gns(*source->GetSpec());
00123 return Write(gns, filename);
00124 }
00125 inline
00126 gnGenomeSpec *gnGBKSource::GetSpec() const{
00127 return m_spec->Clone();
00128 }
00129
00130 #endif
00131