00001 ///////////////////////////////////////////////////////////////////////////// 00002 // File: gnBaseSource.h 00003 // Purpose: Abstract source class 00004 // Description: Basic interface to all source objects 00005 // Changes: 00006 // Version: libGenome 0.5.1 00007 // Author: Aaron Darling 00008 // Modified by: 00009 // Copyright: (c) Aaron Darling 00010 // Licenses: See COPYING file for details 00011 ///////////////////////////////////////////////////////////////////////////// 00012 #ifndef _gnBaseSource_h_ 00013 #define _gnBaseSource_h_ 00014 00015 #include "gn/gnDefs.h" 00016 00017 #include <string> 00018 00019 #include "gn/gnClone.h" 00020 class gnGenomeSpec; 00021 class gnFilter; 00022 00023 /** 00024 * gnBaseSource defines a standard interface for derived classes to 00025 * provide access to file, database, and network sources of genetic data. 00026 * First the data source is opened and parsed, using the open() function. 00027 * The source class then creates and a gnGenomeSpec containing references 00028 * to its sequence data. The gnGenomeSpec can then be used by gnSequence 00029 * to give programmer friendly access to the sequence data. 00030 */ 00031 class GNDLLEXPORT gnBaseSource : public gnClone 00032 { 00033 public: 00034 gnBaseSource(){} 00035 /** 00036 * Destructor, frees memory used by this source class. 00037 */ 00038 virtual ~gnBaseSource(){} 00039 virtual gnBaseSource* Clone() const = 0; 00040 00041 /** 00042 * Opens the source given in "openString" for reading. 00043 * @param openString The name of the source (file, network URL, or 00044 * database) to open. 00045 * @throws Will throw a FileNotOpened exception if the file was not found 00046 * or was not accessible. Will propagate a FileUnreadable exception if the 00047 * file format was invalid. 00048 */ 00049 virtual void Open( string openString ) = 0; 00050 /** 00051 * Opens this source for reading. 00052 * @throws Will throw a FileNotOpened exception if the file was not found 00053 * or was not accessible. 00054 */ 00055 virtual void Open() = 0; 00056 /** 00057 * Closes the file or connection this source is reading from. 00058 * @throws IOStreamError if an error occurs closing the file. 00059 */ 00060 virtual void Close() = 0; 00061 /** 00062 * Get the location of the source that is being used. 00063 * @return The location string describing this source, usually a file 00064 * name. 00065 */ 00066 virtual string GetOpenString() const = 0; 00067 00068 /** 00069 * Get the number of sequence contigs in this source. 00070 * @return The number of contigs in this source. 00071 */ 00072 virtual uint32 GetContigListLength() const = 0; 00073 /** 00074 * Looks for a contig by name. 00075 * Returns true if it finds the contig, otherwise false. 00076 * @param name The name of the contig to look for. 00077 * @return True if the named contig exists, false otherwise. 00078 */ 00079 virtual boolean HasContig( const string& name ) const = 0; 00080 /** 00081 * Get a contig index by name. 00082 * If the source does not contain a contig by the specified name 00083 * GetContigID returns UINT32_MAX. 00084 * @param name The name of the contig to look for. 00085 * @return The index of the named contig or UINT32_MAX. 00086 */ 00087 virtual uint32 GetContigID( const string& name ) const = 0; 00088 /** 00089 * Get the name of the specified contig. 00090 * Returns an empty string if the specified contig is out of range. 00091 * @param i The index of the contig or ALL_CONTIGS. 00092 * @return The name of the contig or an empty string. 00093 */ 00094 virtual string GetContigName( const uint32 i ) const = 0; 00095 /** 00096 * Get the total number of base pairs in the specified contig. 00097 * @param i The index of the contig or ALL_CONTIGS. 00098 * @return The length in base pairs of the specified contig. 00099 */ 00100 virtual gnSeqI GetContigSeqLength( const uint32 i ) const = 0; 00101 00102 /** 00103 * Get the filter currently being used to filter unwanted characters out of read sequences. 00104 * @return A pointer to the gnFilter currently in use. 00105 */ 00106 virtual const gnFilter* GetFilter() const = 0; 00107 /** 00108 * Set the filter that will be used to filter unwanted characters out of the sequence data. 00109 * @param filter The filter to remove unwanted characters from the sequence. 00110 * @throws NullPointer is thrown if the specified filter pointer is null. 00111 */ 00112 virtual void SetFilter( gnFilter* filter ) = 0; 00113 00114 /** 00115 * Gets raw input from this source. 00116 * Read will attempt to read "bufLen" bytes starting at "pos" directly from the source. 00117 * It stores the data in "buf", and returns the actual number of bytes read in bufLen. 00118 * Read will return false if a serious error occurs. 00119 * @param pos The position in the file to start reading. 00120 * @param buf The character array to store data into. 00121 * @param bufLen The number of bytes to read. 00122 * @return True if the operation was successful. 00123 */ 00124 virtual boolean Read( const uint64 pos, char* buf, uint32& bufLen) = 0; 00125 /** 00126 * Gets sequence data from this source. 00127 * SeqRead will attempt to read "bufLen" base pairs starting at "start", an offset into the sequence. 00128 * Reading inside a specific contig can be accomplished by supplying the "contigI" parameter with 00129 * a valid contig index. 00130 * SeqRead stores the sequence data in "buf" and returns the actual number of bases read in "bufLen". 00131 * SeqRead will return false if a serious error occurs. 00132 * @param start The base pair to start reading at. 00133 * @param buf The character array to store base pairs into. 00134 * @param bufLen The number of base pairs to read. 00135 * @param contigI The index of the contig to read or ALL_CONTIGS by default. 00136 * @return True if the operation was successful. 00137 */ 00138 virtual boolean SeqRead( const gnSeqI start, char* buf, uint32& bufLen, const uint32 contigI=ALL_CONTIGS ) = 0; 00139 /** 00140 * Get the annotated sequence data as a gnGenomeSpec. 00141 * GetSpec returns a gnGenomeSpec which contains the sequence, header, 00142 * and feature data contained by this source. 00143 * @return The annotated sequence data. 00144 */ 00145 virtual gnGenomeSpec *GetSpec() const = 0; 00146 private: 00147 };// class gnBaseSource 00148 00149 #endif 00150 // _gnBaseSource_h_