BadgerDB
/afs/cs.wisc.edu/u/n/w/nwilliam/private/workspace/Quut/src/sort.h
00001 
00007 #pragma once
00008 
00009 #include <vector>
00010 #include "filescan.h"
00011 
00012 namespace badgerdb
00013 {
00014 
00015 typedef struct
00016 {
00017   RecordId rid;       // record id of current record
00018   char* field;        // pointer to field
00019   int length;         // length of field
00020 } SORTREC;
00021 
00022 
00026 class SortedFile {
00027  public:
00028   /*
00029    * Constructor
00030    * @param filename  Name of the original file
00031    * @param offset    Offset of sort key
00032    * @param length    Length of sort key
00033    * @param type      Type of sort key
00034    * @param maxItems  Maximum number of items in every sorted run. 
00035    */
00036   SortedFile(const std::string & fileName, int offset, int length, Datatype type, int maxItems);
00037 
00038   /*
00039    * Fetch next record in sort order.
00040    * @throws EndOfFileException() when all records are read.
00041    */
00042   std::string next();
00043 
00044   /*
00045    * Record a position in sort sequence.
00046    */
00047   void setMark();
00048 
00049   /*
00050    * Go to last recorded spot.
00051    */
00052   void gotoMark();
00053 
00054   /*
00055    * Destructor
00056    */
00057   ~SortedFile();
00058 
00059  private:
00060   void sortFile();                    // split source file into sub-runs
00061   void generateRun(int numItems);     // generate one sub-run of file
00062   void startScans();                  // start a scan on each sorted run
00063 
00064   typedef struct
00065   {
00066     std::string name;                 // name of run file
00067     FileScan* file;                   // ptr to sorted run of file
00068     int valid;                        // TRUE if recPtr has a record
00069     std::string rec;
00070     RecordId rid;                     // RecordId of current record of run
00071     RecordId mark;                    // last marked spot (RecordId) in file
00072   } RUN;
00073 
00074   std::vector<RUN> runs;              // holds info about each sub-run
00075 
00076   FileScan* file;                     // source file to sort
00077   std::string fileName;               // name of source file to sort
00078   Datatype type;                      // type of sort attribute
00079   int offset;                         // offset of sort attribute
00080   int length;                         // length of sort attribute
00081 
00082   SORTREC *buffer;                    // in-memory sort buffer
00083   int maxItems;                       // max. # of items/tuples in buffer
00084   int numItems;                       // current # of items in buffer
00085 };
00086 
00087 }
 All Classes Namespaces Functions Variables Typedefs Enumerations Friends