BadgerDB
|
00001 00008 #include <sstream> 00009 #include <cstring> 00010 #include <math.h> 00011 #include "index.h" 00012 #include "filescan.h" 00013 #include "exceptions/bad_param_exception.h" 00014 #include "exceptions/duplicate_attribute_exception.h" 00015 #include "exceptions/no_more_records_exception.h" 00016 #include "exceptions/end_of_file_exception.h" 00017 #include "exceptions/file_not_found_exception.h" 00018 #include "exceptions/record_not_found_exception.h" 00019 #include "exceptions/duplicate_insertion_exception.h" 00020 #include "exceptions/directory_overflow_exception.h" 00021 00022 #define MAX(a,b) ((a) > (b) ? (a) : (b)) 00023 00024 namespace badgerdb { 00025 00026 // Constructor for the Index class. The arguments passed in are 00027 // 'name' -- file name of the relation 00028 // 'offset', 'length', 'type' -- describing the attribute indexed 00029 // 'unique' -- flag for enforcing uniquess of the entries 00030 00031 Index::Index(const std::string &name, int offset, int length, Datatype type, int unique, BufMgr *bufferMgr) 00032 { 00033 bufMgr = bufferMgr; 00034 Page *pagePtr; 00035 00036 // For this assignment turn off hash indices on strings 00037 if(type == STRING) 00038 { 00039 throw BadParamException(); 00040 } 00041 00042 curBuc = NULL; 00043 00044 recSize = length + sizeof(RecordId); // size of the index entry 00045 numSlots = (Page::SIZE - 2*sizeof(short)) / recSize; // # entries on a page 00046 00047 // get name of the index file by concatenating relation name and 00048 // the offset of the attribute 00049 00050 std::ostringstream outputString; 00051 outputString << name << '.' << offset << std::endl; 00052 std::string indexName = outputString.str(); 00053 00054 // The constructor runs in two cases. In the first case, an index 00055 // doesn't exist as detected by db.openFile. The relation file 00056 // should be scanned and each tuple is inserted to the index. In 00057 // the second case, an index already exists. Only the headerPage 00058 // needs to read in and pinned down in the buffer pool. 00059 00060 try 00061 { 00062 file = new BlobFile(indexName, false/*open*/); //open file 00063 00064 headerPageNo = 1; 00065 bufMgr->readPage(file, headerPageNo, pagePtr); 00066 headerPage = (iHeaderPage*) pagePtr; 00067 dirSize = (int)pow(2, headerPage->depth); 00068 return; 00069 } 00070 catch(FileNotFoundException e) 00071 { 00072 } 00073 00074 file = new BlobFile(indexName, true/*create*/); //create file 00075 00076 // allocate and initialize the header page 00077 bufMgr->allocPage(file, headerPageNo, (Page*&)headerPage); 00078 strcpy(headerPage->fileName, name.c_str()); 00079 00080 headerPage->offset = offset; 00081 headerPage->length = length; 00082 headerPage->type = type; 00083 headerPage->depth = 0; 00084 headerPage->unique = unique; 00085 dirSize = 1; 00086 00087 // allocate and initialize the first bucket pointed by dir[0] 00088 PageId pageNo; 00089 Bucket* bucket; 00090 bufMgr->allocPage(file, pageNo, (Page*&)bucket); 00091 00092 bucket->depth = 0; 00093 bucket->slotCnt = 0; 00094 headerPage->dir[0] = pageNo; 00095 00096 bufMgr->unPinPage(file, pageNo, true); 00097 00098 // build index by scanning the relation file and inserting every 00099 // tuple 00100 00101 FileScan fileScan(name, bufferMgr); 00102 00103 fileScan.startScan(offset, length, type, std::string(""), EQ); 00104 00105 RecordId rid; 00106 00107 while(1) 00108 { 00109 try 00110 { 00111 fileScan.scanNext(rid); 00112 std::string recordString = fileScan.getRecord(); 00113 insertEntry(recordString.c_str() + offset, rid); 00114 } 00115 catch(EndOfFileException &e) 00116 { 00117 break; 00118 } 00119 } 00120 } 00121 00122 Index::~Index() { 00123 00124 bufMgr->unPinPage(file, headerPageNo, true); 00125 00126 if (curBuc != NULL) 00127 { 00128 bufMgr->unPinPage(file, curPageNo, true); 00129 } 00130 00131 bufMgr->flushFile(file); 00132 00133 delete file; 00134 } 00135 00136 // return a hashvalue hashed from the attribute 'value' according to 00137 // the current depth of the directory 00138 00139 void Index::hashIndex(const void *attr, int &hashvalue) 00140 { 00141 int index; 00142 const char *value = (const char *)attr; 00143 00144 switch (headerPage->type) 00145 { 00146 case STRING: 00147 { 00148 // For strings "add" the values of the characters, stopping 00149 // at the first character that is 0 00150 index = value[0]; 00151 for (int i = 1; i < headerPage->length; i++) 00152 { 00153 if (!value[i]) 00154 index += value[i]; 00155 else 00156 break; 00157 } 00158 } 00159 break; 00160 00161 case DOUBLE: 00162 { 00163 // Treat doubles as strings of length sizeof (double) 00164 index = value[0]; 00165 for (int i = 1; i < headerPage->length; i++) index += value[i]; 00166 } 00167 break; 00168 00169 case INTEGER: 00170 // copy the integer value to avoid byte-alignment errors. 00171 memcpy (&index, value, sizeof(int)); 00172 break; 00173 } 00174 00175 // hashvalue is the lower bits of 'index'. Number of bits is determined 00176 // by the depth of the directory 00177 00178 index = std::abs(index); 00179 int mask = 1 << headerPage->depth; 00180 hashvalue = index % mask; 00181 } 00182 00183 // Insert an <attribute, rid> pair into the index. Return OK if the 00184 // entry is inserted and DIROVERFLOW if the directory isn't large 00185 // enough to hold the indices. 00186 00187 void Index::insertEntry(const void *value, RecordId rid) 00188 { 00189 Bucket *bucket; 00190 Bucket *newBucket; 00191 PageId newPageNo; 00192 char data[Page::SIZE*2]; 00193 int counter; 00194 int index; 00195 00196 // If the 'unique' flag is set, scan the index to see if the 00197 // <attribute, rid> pair already exists 00198 00199 if (headerPage->unique == UNIQUE) 00200 { 00201 RecordId outRid; 00202 00203 startScan(value); 00204 00205 while(1) 00206 { 00207 try 00208 { 00209 scanNext(outRid); 00210 if(!memcmp(&outRid, &rid, sizeof(RecordId))) 00211 throw DuplicateInsertionException(); 00212 } 00213 catch (NoMoreRecordsException &e) 00214 { 00215 break; 00216 } 00217 } 00218 00219 endScan(); 00220 } 00221 00222 // Get the bucket containing the entry into buffer pool 00223 hashIndex(value, index); 00224 PageId pageNo = headerPage->dir[index]; 00225 00226 #ifdef DEBUGIND 00227 std::cout << "Inserting entry " << *(int*)value << " to bucket " 00228 << pageNo << std::endl; 00229 #endif //DEBUGIND 00230 00231 bufMgr->readPage(file, pageNo, (Page*&)bucket); 00232 00233 // the bucket needs to be splitted if the number of entries 00234 // on the bucket equals the maximum 00235 if (bucket->slotCnt == numSlots) { // splitting bucket 00236 00237 // allocate a new bucket 00238 bufMgr->allocPage(file, newPageNo, (Page*&)newBucket); 00239 00240 // Initialize this newly allocated bucket 00241 newBucket->depth = ++(bucket->depth); 00242 newBucket->slotCnt = 0; 00243 00244 // Copy all (value, rid) pairs in the old bucket and the new 00245 // entry to a temporary area 00246 memcpy(data, bucket->data, numSlots*recSize); 00247 memcpy(&(data[numSlots*recSize]), value, headerPage->length); 00248 memcpy(&(data[numSlots*recSize + headerPage->length]), &rid, sizeof(RecordId)); 00249 00250 counter = bucket->slotCnt + 1; 00251 bucket->slotCnt = 0; 00252 00253 // the directory needs to be doubled if the depth of the bucket 00254 // being splitted equals the depth of the directory 00255 00256 if (bucket->depth > headerPage->depth) 00257 { // doubling directory 00258 00259 // The directory is doubled and the lower half of the directory 00260 // is copied to the upper half 00261 00262 int newDirSize = 2 * dirSize; 00263 if (newDirSize > DIRSIZE) 00264 throw DirectoryOverflowException(); 00265 00266 for(int i = 0; i < dirSize; i++) 00267 { 00268 headerPage->dir[i + dirSize] = headerPage->dir[i]; 00269 } 00270 00271 dirSize = newDirSize; 00272 (headerPage->depth)++; 00273 headerPage->dir[index + (1 << (bucket->depth - 1))] = newPageNo; 00274 } 00275 else 00276 { 00277 // reset the appropriate directories to the new bucket 00278 int oldindex = index % (1 << (bucket->depth - 1)); 00279 int newindex = oldindex + (1 << (bucket->depth - 1)); 00280 for (int j = 0; j < dirSize; j++) 00281 { 00282 if ((j % (1 << (bucket->depth))) == newindex) 00283 { 00284 headerPage->dir[j] = newPageNo; 00285 } 00286 } 00287 } 00288 00289 #ifdef DEBUGIND 00290 printDir(); 00291 #endif //DEBUGIND 00292 00293 // call insertEntry recursively to insert all (value, rid) 00294 // pairs in the temporary area to the index 00295 00296 for (int k = 0; k < counter; k++) { 00297 value = &(data[k * recSize]); 00298 rid = * ((RecordId *)((char *)value + headerPage->length)); 00299 insertEntry(value, rid); 00300 } 00301 00302 bufMgr->unPinPage(file, newPageNo, true); 00303 00304 } 00305 else 00306 { 00307 // There is sufficient free space in the bucket. Insert (value, rid) here 00308 00309 int offset = (bucket->slotCnt) * recSize; 00310 memcpy(&(bucket->data[offset]), value, headerPage->length); 00311 memcpy(&(bucket->data[offset+headerPage->length]), &rid, sizeof(RecordId)); 00312 (bucket->slotCnt)++; 00313 } 00314 00315 bufMgr->unPinPage(file, pageNo, true); 00316 } 00317 00318 void Index::printDir() { 00319 std::cout << "printing directory...\n"; 00320 std::cout << "depth is " << headerPage->depth << std::endl; 00321 for (int i = 0; i < dirSize; i++) 00322 std::cout << i << "\tpoints to bucket " << headerPage->dir[i] << std::endl; 00323 std::cout << std::endl; 00324 } 00325 00326 // delete <attribute, rid> pair from the index. return OK if deleted 00327 // return RECNOTFOUND if such a pair does not exist in the index 00328 00329 void Index::deleteEntry(const void* value, const RecordId &rid) 00330 { 00331 int index; 00332 Bucket* bucket; 00333 00334 hashIndex(value, index); 00335 00336 // read in the bucket that might have the entry in it 00337 PageId pageNo = headerPage->dir[index]; 00338 bufMgr->readPage(file, pageNo, (Page*&)bucket); 00339 00340 // scan the bucket for the entry. Delete it if found 00341 for(int i = 0; i < bucket->slotCnt; i++) 00342 { 00343 try 00344 { 00345 matchRec(bucket, value, i); 00346 if(!memcmp(&rid, &(bucket->data[i*recSize + headerPage->length]), sizeof(RecordId))) 00347 { 00348 // the entry is found. Decrease the entry counts in the bucket 00349 // and copy the last entry in the bucket to the slot occupied 00350 // by the deleted entry 00351 00352 (bucket->slotCnt)--; 00353 memcpy(&(bucket->data[i*recSize]), &(bucket->data[recSize*(bucket->slotCnt)]), recSize); 00354 bufMgr->unPinPage(file, pageNo, true); 00355 return; 00356 } 00357 } 00358 catch(RecordNotFoundException &e) 00359 { 00360 } 00361 } 00362 00363 bufMgr->unPinPage(file, pageNo, false); 00364 00365 throw RecordNotFoundException(); 00366 } 00367 00368 // Test if the index entry in 'bucket' matches the attribute 'value. 00369 // 'offset' indicates the location of the entry in the bucket 00370 00371 void Index::matchRec(const Bucket* bucket, const void* value, int offset) 00372 { 00373 const char* tmp = &(bucket->data[offset*recSize]); 00374 00375 switch(headerPage->type) 00376 { 00377 case INTEGER: 00378 if (*(int *)value == *(int *)tmp) 00379 //if (atoi((char *)value) == *(int *)tmp) 00380 return; 00381 break; 00382 00383 case DOUBLE: 00384 { 00385 // Take care of byte misallignments 00386 double inVal, inTmp; 00387 memcpy (&inVal, value, sizeof (double)); 00388 memcpy (&inTmp, tmp, sizeof (double)); 00389 // if (*(double *)value == *(double*)tmp) return OK; 00390 if (inVal == inTmp) return; 00391 //if (atof((char *)value) == inTmp) return; 00392 } 00393 break; 00394 00395 case STRING: 00396 if (!memcmp(value, tmp, headerPage->length)) 00397 return; 00398 break; 00399 } 00400 00401 throw RecordNotFoundException(); 00402 } 00403 00404 void Index::printBucs() 00405 { 00406 00407 Bucket* bucket; 00408 00409 std::cerr << std::endl << "printing indices"<< std::endl; 00410 00411 for (int i = 0; i < dirSize; i++) 00412 { 00413 PageId pageNo = headerPage->dir[i]; 00414 std::cerr << "page " << pageNo << ": "; 00415 00416 bufMgr->readPage(file, pageNo, (Page*&)bucket); 00417 for (int j = 0; j < bucket->slotCnt; j++) 00418 { 00419 switch(headerPage->type) 00420 { 00421 case INTEGER: 00422 std::cerr << *(int*)&(bucket->data[j*recSize]) << "\t"; 00423 break; 00424 case DOUBLE: 00425 std::cerr << *(double*)&(bucket->data[j*recSize]) << "\t"; 00426 break; 00427 default:break; 00428 } 00429 } 00430 std::cerr << std::endl; 00431 bufMgr->unPinPage(file, pageNo, false); 00432 } 00433 } 00434 00435 // start a scan of the entries with attribute 'value'. return SCANTABFULL 00436 // if too many scans are open at the same time 00437 00438 void Index::startScan(const void* scanValue) 00439 { 00440 int hashvalue; 00441 00442 curValue = scanValue; 00443 00444 // read in and pin down the bucket containing entries with 'value' 00445 // in the buffer pool 00446 hashIndex(curValue, hashvalue); 00447 00448 PageId pageNo = curPageNo = headerPage->dir[hashvalue]; 00449 bufMgr->readPage(file, pageNo, (Page*&)curBuc); 00450 00451 curOffset = 0; 00452 } 00453 00454 // return the next entry with attribute 'value'. return NOMORERECS if 00455 // it reaches the end of the bucket 00456 00457 void Index::scanNext(RecordId& outRid) 00458 { 00459 int &offset = curOffset; 00460 Bucket* buc = curBuc; 00461 const void* value = curValue; 00462 00463 while(offset < buc->slotCnt) 00464 { 00465 try 00466 { 00467 matchRec(buc, value, offset); 00468 break; 00469 } 00470 catch(RecordNotFoundException &e) 00471 { 00472 } 00473 00474 offset++; 00475 } 00476 00477 if (offset == buc->slotCnt) 00478 throw NoMoreRecordsException(); 00479 else 00480 { 00481 outRid = *(RecordId *)&(buc->data[offset*recSize + headerPage->length]); 00482 offset++; 00483 } 00484 } 00485 00486 // unpin the bucket and clear the scan table entry. 00487 void Index::endScan() 00488 { 00489 if (curBuc != NULL) 00490 { 00491 bufMgr->unPinPage(file, curPageNo, false); 00492 curBuc = NULL; 00493 } 00494 } 00495 00496 /* 00497 // find a free entry in the scan table 00498 int Index::findFree() 00499 { 00500 int i; 00501 for(i = 0; i < MAXOPENSCANS; i++) 00502 { 00503 if (openScans[i].valid == false) return i; 00504 } 00505 return -1; 00506 } 00507 */ 00508 00509 }