00001 ///////////////////////////////////////////////////////////////////////////// 00002 // File: gnFASSource.h 00003 // Purpose: Implements gnBaseSource for .FAS files 00004 // Description: 00005 // Changes: 00006 // Version: libGenome 0.5.1 00007 // Author: Aaron Darling 00008 // Modified by: 00009 // Copyright: (c) Aaron Darling 00010 // Licenses: See COPYING file for details 00011 ///////////////////////////////////////////////////////////////////////////// 00012 #ifndef _gnFASSource_h_ 00013 #define _gnFASSource_h_ 00014 00015 #include "gn/gnDefs.h" 00016 00017 #include <string> 00018 #include <fstream> 00019 #include <vector> 00020 #include "gn/gnFileSource.h" 00021 #include "gn/gnSequence.h" 00022 00023 #define FAS_LINE_WIDTH 80 00024 00025 /** 00026 * gnFASSource reads and writes FastA files. 00027 * gnFASSource is used by gnSourceFactory to read files. 00028 * Files can be written in the FastA file format by calling 00029 * gnFASSource::Write( mySpec, "C:\\myFasFile.fas"); 00030 */ 00031 00032 class GNDLLEXPORT gnFASSource : public gnFileSource 00033 { 00034 public: 00035 /** 00036 * Empty Constructor, does nothing. 00037 */ 00038 gnFASSource(); 00039 /** 00040 * Clone Constructor copies the specified gnFASSource. 00041 * @param s The gnFASSource to copy. 00042 */ 00043 gnFASSource( const gnFASSource& s ); 00044 /** 00045 * Destructor, frees memory. 00046 */ 00047 ~gnFASSource(); 00048 /** 00049 * Returns an exact copy of this class. 00050 */ 00051 gnFASSource* Clone() const; 00052 00053 uint32 GetContigListLength() const; 00054 boolean HasContig( const string& name ) const; 00055 uint32 GetContigID( const string& name ) const; 00056 string GetContigName( const uint32 i ) const; 00057 gnSeqI GetContigSeqLength( const uint32 i ) const; 00058 gnFileContig* GetContig( const uint32 i ) const; 00059 00060 boolean SeqRead( const gnSeqI start, char* buf, uint32& bufLen, const uint32 contigI=ALL_CONTIGS ) ; 00061 00062 /** 00063 * Write the given gnSequence to a FastA file. 00064 * @param sequence The gnSequence to write out. 00065 * @param filename The name of the file to write. 00066 * @param write_coords If true each entry's name will be followed by the coordinates of the entry 00067 * in the context of the entrire file. 00068 * @param enforce_unique_names If true each entry's name will be recorded as they are written. Each 00069 * successive duplicate name that is found will have an underscore and a 00070 * number appended to it, indicating the number of entries by the same 00071 * name which have already been written. 00072 * Turning this off will yield a slight performance improvement when writing 00073 * files with a large number of entries. (More than 1000) 00074 * @throws A FileNotOpened() exception may be thrown. 00075 */ 00076 static void Write(gnSequence& sequence, const string& filename, boolean write_coords = true, boolean enforce_unique_names = true); 00077 00078 /** 00079 * Write the given gnSequence to an ostream. 00080 * @param sequence The gnSequence to write out. 00081 * @param m_ostream The output stream to write to. 00082 * @param write_coords If true each entry's name will be followed by the coordinates of the entry 00083 * in the context of the entrire file. 00084 * @param enforce_unique_names If true each entry's name will be recorded as they are written. Each 00085 * successive duplicate name that is found will have an underscore and a 00086 * number appended to it, indicating the number of entries by the same 00087 * name which have already been written. 00088 * Turning this off will yield a slight performance improvement when writing 00089 * files with a large number of entries. (More than 1000) 00090 */ 00091 static void Write(gnSequence& sequence, ostream& m_ostream, boolean write_coords = true, boolean enforce_unique_names = true); 00092 00093 /** 00094 * Deprecated - do not use. 00095 * Write the given source to a FastA file. 00096 * @param source The spec to write out. 00097 * @param filename The name of the file to write. 00098 */ 00099 static boolean Write(gnBaseSource *source, const string& filename); 00100 00101 gnGenomeSpec *GetSpec() const; 00102 00103 gnFileContig* GetFileContig( const uint32 contigI ) const; 00104 private: 00105 boolean SeqSeek( const gnSeqI start, const uint32 contigI, uint64& startPos, uint64& readableBytes ); 00106 boolean SeqStartPos( const gnSeqI start, gnFileContig& contig, uint64& startPos, uint64& readableBytes ); 00107 boolean ParseStream( istream& fin ); 00108 00109 vector< gnFileContig* > m_contigList; 00110 };// class gnFASSource 00111 00112 inline 00113 gnFASSource* gnFASSource::Clone() const 00114 { 00115 return new gnFASSource( *this ); 00116 } 00117 00118 inline 00119 uint32 gnFASSource::GetContigListLength() const 00120 { 00121 return m_contigList.size(); 00122 } 00123 00124 #endif 00125 // _gnFASSource_h_