00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "gn/gnFilter.h"
00013 #include "gn/gnRAWSource.h"
00014 #include "gn/gnGenomeSpec.h"
00015 #include "gn/gnFragmentSpec.h"
00016 #include "gn/gnSourceSpec.h"
00017 #include "gn/gnStringTools.h"
00018 #include "gn/gnDebug.h"
00019
00020 gnRAWSource::gnRAWSource()
00021 {
00022 m_openString = "";
00023 m_contig = NULL;
00024 m_pFilter = NULL;
00025 }
00026
00027 gnRAWSource::gnRAWSource( const gnRAWSource& s ) : gnFileSource(s)
00028 {
00029 m_contig = NULL;
00030 if(s.m_contig != NULL)
00031 m_contig = s.m_contig->Clone();
00032 }
00033
00034 gnRAWSource::~gnRAWSource()
00035 {
00036 m_ifstream.close();
00037 delete m_contig;
00038 }
00039
00040 boolean gnRAWSource::HasContig( const string& name ) const
00041 {
00042 if( name.length() == 0 )
00043 return true;
00044 return false;
00045 }
00046
00047 uint32 gnRAWSource::GetContigID( const string& name ) const
00048 {
00049 return ALL_CONTIGS;
00050 }
00051
00052 string gnRAWSource::GetContigName( const uint32 i ) const
00053 {
00054 return "";
00055 }
00056
00057 gnSeqI gnRAWSource::GetContigSeqLength( const uint32 i ) const
00058 {
00059 if( m_contig && (i == 0 || i == ALL_CONTIGS))
00060 return m_contig->GetSeqLength();
00061 return GNSEQI_ERROR;
00062 }
00063
00064 boolean gnRAWSource::SeqRead( const gnSeqI start, char* buf, gnSeqI& bufLen, const uint32 contigI ){
00065 return Read( start, buf, bufLen );
00066 }
00067
00068 gnGenomeSpec *gnRAWSource::GetSpec() const{
00069 return m_spec->Clone();
00070 }
00071
00072 boolean gnRAWSource::Write(gnSequence& seq, const string& filename){
00073 ofstream m_ofstream(filename.c_str(), ios::out | ios::binary);
00074 if(!m_ofstream.is_open())
00075 return false;
00076
00077 gnSeqC buf[BUFFER_SIZE + 1];
00078 buf[BUFFER_SIZE] = 0;
00079 gnSeqI readOffset = 0;
00080 gnSeqI readLength = seq.length();
00081 while(readLength > 0){
00082 gnSeqI writeLen = readLength < BUFFER_SIZE ? readLength : BUFFER_SIZE;
00083 if(!seq.ToArray(buf, writeLen, readOffset + 1))
00084 return false;
00085 m_ofstream.write( buf, writeLen );
00086 readLength -= writeLen;
00087 readOffset += writeLen;
00088 }
00089 m_ofstream.flush();
00090 m_ofstream.close();
00091 return true;
00092 }
00093
00094 gnFileContig* gnRAWSource::GetFileContig( const uint32 contigI ) const{
00095 if(contigI > 0)
00096 return NULL;
00097 return m_contig;
00098 }
00099
00100
00101 boolean gnRAWSource::ParseStream( istream& fin )
00102 {
00103
00104 uint64 streamPos = 0;
00105 uint64 bufReadLen = 0;
00106 Array<char> array_buf( BUFFER_SIZE );
00107 char* buf = array_buf.data;
00108 gnSeqI seqLength = 0;
00109
00110 if( m_contig == NULL )
00111 m_contig = new gnFileContig();
00112 m_contig->SetName( "Raw Data" );
00113 m_contig->SetRepeatSeqGap(true);
00114 m_contig->SetSectStart(gnContigSequence, 0);
00115
00116 uint64 offset = 0;
00117 if( !CheckRawData() ){
00118 fin.seekg( 0, ios::end );
00119 offset = fin.tellg();
00120 }
00121 else
00122 {
00123 while( !fin.eof() )
00124 {
00125
00126 fin.read( buf , BUFFER_SIZE );
00127 bufReadLen = fin.gcount();
00128
00129 for( uint32 i=0 ; i < bufReadLen ; i++ )
00130 {
00131 if(m_pFilter == NULL || m_pFilter->IsValid(buf[i]))
00132 seqLength++;
00133 else{
00134 m_contig->SetRepeatSeqGap(false);
00135 }
00136 }
00137 streamPos += bufReadLen;
00138 }
00139 }
00140 m_contig->SetSectEnd(gnContigSequence, streamPos + offset);
00141 m_contig->SetSeqLength(seqLength + offset );
00142 m_spec = new gnGenomeSpec();
00143 gnFragmentSpec* fragspec = new gnFragmentSpec();
00144 gnSourceSpec* sspec = new gnSourceSpec(this);
00145 sspec->SetSourceName(m_openString);
00146 m_spec->AddSpec(fragspec);
00147 fragspec->AddSpec(sspec);
00148
00149 m_ifstream.clear();
00150 return true;
00151 }