00001 /* -*- mode:C++; c-basic-offset:4 -*- 00002 Shore-MT -- Multi-threaded port of the SHORE storage manager 00003 00004 Copyright (c) 2007-2009 00005 Data Intensive Applications and Systems Labaratory (DIAS) 00006 Ecole Polytechnique Federale de Lausanne 00007 00008 All Rights Reserved. 00009 00010 Permission to use, copy, modify and distribute this software and 00011 its documentation is hereby granted, provided that both the 00012 copyright notice and this permission notice appear in all copies of 00013 the software, derivative works or modified versions, and any 00014 portions thereof, and that both notices appear in supporting 00015 documentation. 00016 00017 This code is distributed in the hope that it will be useful, but 00018 WITHOUT ANY WARRANTY; without even the implied warranty of 00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS 00020 DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER 00021 RESULTING FROM THE USE OF THIS SOFTWARE. 00022 */ 00023 00024 /*<std-header orig-src='shore' incl-file-exclusion='SM_BASE_H'> 00025 00026 $Id: sm_base.h,v 1.154 2010/07/07 21:43:46 nhall Exp $ 00027 00028 SHORE -- Scalable Heterogeneous Object REpository 00029 00030 Copyright (c) 1994-99 Computer Sciences Department, University of 00031 Wisconsin -- Madison 00032 All Rights Reserved. 00033 00034 Permission to use, copy, modify and distribute this software and its 00035 documentation is hereby granted, provided that both the copyright 00036 notice and this permission notice appear in all copies of the 00037 software, derivative works or modified versions, and any portions 00038 thereof, and that both notices appear in supporting documentation. 00039 00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY 00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS 00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND 00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 00044 00045 This software was developed with support by the Advanced Research 00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by 00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518. 00048 Further funding for this work was provided by DARPA through 00049 Rome Research Laboratory Contract No. F30602-97-2-0247. 00050 00051 */ 00052 00053 #ifndef SM_BASE_H 00054 #define SM_BASE_H 00055 00056 #include "w_defines.h" 00057 00058 /* -- do not edit anything above this line -- </std-header>*/ 00059 00060 /**\file sm_base.h 00061 * \ingroup Macros 00062 */ 00063 00064 #ifdef __GNUG__ 00065 #pragma interface 00066 #endif 00067 00068 #include <climits> 00069 #ifndef OPTION_H 00070 #include "option.h" 00071 #endif 00072 #ifndef __opt_error_def_gen_h__ 00073 #include "opt_error_def_gen.h" 00074 #endif 00075 00076 00077 class ErrLog; 00078 class sm_stats_info_t; 00079 class xct_t; 00080 class xct_i; 00081 00082 class device_m; 00083 class io_m; 00084 class bf_m; 00085 class comm_m; 00086 class log_m; 00087 class lock_m; 00088 00089 class tid_t; 00090 class option_t; 00091 00092 #ifndef SM_EXTENTSIZE 00093 #define SM_EXTENTSIZE 8 00094 #endif 00095 #ifndef SM_LOG_PARTITIONS 00096 #define SM_LOG_PARTITIONS 8 00097 #endif 00098 00099 typedef w_rc_t rc_t; 00100 00101 00102 /**\cond skip 00103 * This structure collects the depth on construction 00104 * and checks that it matches the depth on destruction; this 00105 * is to ensure that we haven't forgotten to release 00106 * an anchor somewhere. 00107 * It's been extended to check the # times 00108 * we have acquired the 1thread_log_mutex. 00109 * 00110 * We're defining the CHECK_NESTING_VARIABLES macro b/c 00111 * this work is spread out and we want to have 1 place to 00112 * determine whether it's turned on or off; don't want to 00113 * make the mistake of changing the debug level (on which 00114 * it depends) in only one of several places. 00115 * 00116 * NOTE: this doesn't work in a multi-threaded xct context. 00117 * That's b/c the check is too late -- once the count goes 00118 * to zero, another thread can change it and throw off all the 00119 * counts. To be sure, we'd have to use a TLS copy as well 00120 * as the common copy of these counts. 00121 */ 00122 #if W_DEBUG_LEVEL > 0 00123 #define CHECK_NESTING_VARIABLES 1 00124 #else 00125 #define CHECK_NESTING_VARIABLES 0 00126 #endif 00127 struct check_compensated_op_nesting { 00128 #if CHECK_NESTING_VARIABLES 00129 xct_t* _xd; 00130 int _depth; 00131 int _depth_of_acquires; 00132 int _line; 00133 const char *const _file; 00134 // static methods are so we can avoid having to 00135 // include xct.h here. 00136 static int compensated_op_depth(xct_t* xd, int dflt); 00137 static int acquire_1thread_log_depth(xct_t* xd, int dflt); 00138 00139 check_compensated_op_nesting(xct_t* xd, int line, const char *const file) 00140 : _xd(xd), 00141 _depth(_xd? compensated_op_depth(_xd, 0) : 0), 00142 _depth_of_acquires(_xd? acquire_1thread_log_depth(_xd, 0) : 0), 00143 _line(line), 00144 _file(file) 00145 { 00146 } 00147 00148 ~check_compensated_op_nesting() { 00149 if(_xd) { 00150 if( _depth != compensated_op_depth(_xd, _depth) ) { 00151 fprintf(stderr, 00152 "th.%d check_compensated_op_nesting(%d,%s) depth was %d is %d\n", 00153 sthread_t::me()->id, 00154 _line, _file, _depth, compensated_op_depth(_xd, _depth)); 00155 } 00156 00157 if(_depth_of_acquires != acquire_1thread_log_depth(_xd, _depth)) { 00158 fprintf(stderr, 00159 "th.%d check_acquire_1thread_log_depth (%d,%s) depth was %d is %d\n", 00160 sthread_t::me()->id, 00161 _line, _file, _depth_of_acquires, 00162 acquire_1thread_log_depth(_xd, _depth)); 00163 } 00164 00165 w_assert0(_depth == compensated_op_depth(_xd, _depth)); 00166 w_assert0(_depth_of_acquires == acquire_1thread_log_depth(_xd, _depth)); 00167 } 00168 } 00169 #else 00170 check_compensated_op_nesting(xct_t*, int, const char *const) { } 00171 #endif 00172 }; 00173 00174 00175 /**\brief Encapsulates a few types uses in the API */ 00176 class smlevel_0 : public w_base_t { 00177 public: 00178 enum { eNOERROR = 0, eFAILURE = -1 }; 00179 enum { 00180 page_sz = SM_PAGESIZE, // page size (SM_PAGESIZE is set by makemake) 00181 ext_sz = SM_EXTENTSIZE, // extent size 00182 max_exts = max_int4, // max no. extents, must fit extnum_t 00183 #if defined(_POSIX_PATH_MAX) 00184 max_devname = _POSIX_PATH_MAX, // max length of unix path name 00185 // BEWARE: this might be larger than you want. Array sizes depend on it. 00186 // The default might be small enough, e.g., 256; getconf() yields the upper 00187 // bound on this value. 00188 #elif defined(MAXPATHLEN) 00189 max_devname = MAXPATHLEN, 00190 #else 00191 max_devname = 1024, 00192 #endif 00193 max_vols = 20, // max mounted volumes 00194 max_xct_thread = 20, // max threads in a xct 00195 max_servers = 15, // max servers to be connected with 00196 max_keycomp = 20, // max key component (for btree) 00197 max_openlog = SM_LOG_PARTITIONS, // max # log partitions 00198 max_dir_cache = max_vols * 10, 00199 00200 /* XXX I want to propogate sthread_t::iovec_max here, but 00201 it doesn't work because of sm_app.h not including 00202 the thread package. */ 00203 max_many_pages = 8, 00204 00205 srvid_map_sz = (max_servers - 1) / 8 + 1, 00206 ext_map_sz_in_bytes = ((ext_sz + 7) / 8), 00207 00208 dummy = 0 00209 }; 00210 00211 enum { 00212 max_rec_len = max_uint4 00213 }; 00214 00215 typedef sthread_base_t::fileoff_t fileoff_t; 00216 /* 00217 * Sizes-in-Kbytes for for things like volumes and devices. 00218 * A KB is assumes to be 1024 bytes. 00219 * Note: a different type was used for added type checking. 00220 */ 00221 typedef sthread_t::fileoff_t smksize_t; 00222 typedef w_base_t::base_stat_t base_stat_t; 00223 00224 /**\endcond skip */ 00225 00226 /* 00227 * rather than automatically aborting the transaction, when the 00228 * _log_warn_percent is exceeded, this callback is made, with a 00229 * pointer to the xct that did the writing, and with the 00230 * expectation that the result will be one of: 00231 * - return value == RCOK --> proceed 00232 * - return value == eUSERABORT --> victim to abort is given in the argument 00233 * 00234 * The server has the responsibility for choosing a victim and 00235 * for aborting the victim transaction. 00236 * 00237 */ 00238 00239 /**\brief Log space warning callback function type. 00240 * 00241 * For more details of how this is used, see the constructor ss_m::ss_m(). 00242 * 00243 * Storage manager methods check the available log space. 00244 * If the log is in danger of filling to the point that it will be 00245 * impossible to abort a transaction, a 00246 * callback is made to the server. The callback function is of this type. 00247 * The danger point is a threshold determined by the option sm_log_warn. 00248 * 00249 * The callback 00250 * function is meant to choose a victim xct and 00251 * tell if the xct should be 00252 * aborted by returning RC(eUSERABORT). 00253 * 00254 * Any other RC value is returned to the server through the call stack. 00255 * 00256 * The arguments: 00257 * @param[in] iter Pointer to an iterator over all xcts. 00258 * @param[out] victim Victim will be returned here. This is an in/out 00259 * paramter and is initially populated with the transaction that is 00260 * attached to the running thread. 00261 * @param[in] curr Bytes of log consumed by active transactions. 00262 * @param[in] thresh Threshhold just exceeded. 00263 * @param[in] logfile Character string name of oldest file to archive. 00264 * 00265 * This function must be careful not to return the same victim more 00266 * than once, even though the callback may be called many 00267 * times before the victim is completely aborted. 00268 * 00269 * When this function has archived the given log file, it needs 00270 * to notify the storage manager of that fact by calling 00271 * ss_m::log_file_was_archived(logfile) 00272 */ 00273 typedef w_rc_t (*LOG_WARN_CALLBACK_FUNC) ( 00274 xct_i* iter, 00275 xct_t *& victim, 00276 fileoff_t curr, 00277 fileoff_t thresh, 00278 const char *logfile 00279 ); 00280 /**\brief Callback function type for restoring an archived log file. 00281 * 00282 * @param[in] fname Original file name (with path). 00283 * @param[in] needed Partition number of the file needed. 00284 * 00285 * An alternative to aborting a transaction (when the log fills) 00286 * is to archive log files. 00287 * The server can use the log directory name to locate these files, 00288 * and may use the iterator and the static methods of xct_t to 00289 * determine which log file(s) to archive. 00290 * 00291 * Archiving and removing the older log files will work only if 00292 * the server also provides a LOG_ARCHIVED_CALLBACK_FUNCTION 00293 * to restore the 00294 * archived log files when the storage manager needs them for 00295 * rollback. 00296 * This is the function type used for that purpose. 00297 * 00298 * The function must locate the archived log file containing for the 00299 * partition number \a num, which was a suffix of the original log file's 00300 * name. 00301 * The log file must be restored with its original name. 00302 */ 00303 typedef w_base_t::uint4_t partition_number_t; 00304 typedef w_rc_t (*LOG_ARCHIVED_CALLBACK_FUNC) ( 00305 const char *fname, 00306 partition_number_t num 00307 ); 00308 00309 /**\cond skip */ 00310 enum switch_t { 00311 ON = 1, 00312 OFF = 0 00313 }; 00314 /**\endcond skip */ 00315 00316 /**\brief Comparison types used in scan_index_i 00317 * \enum cmp_t 00318 * Shorthand for CompareOp. 00319 */ 00320 enum cmp_t { bad_cmp_t=badOp, eq=eqOp, 00321 gt=gtOp, ge=geOp, lt=ltOp, le=leOp }; 00322 00323 00324 /* used by lock escalation routines */ 00325 enum escalation_options { 00326 dontEscalate = max_int4_minus1, 00327 dontEscalateDontPassOn, 00328 dontModifyThreshold = -1 00329 }; 00330 00331 /**\brief Types of stores. 00332 * \enum store_t 00333 */ 00334 enum store_t { 00335 t_bad_store_t, 00336 /// a b-tree or r-tree index 00337 t_index, 00338 /// a file of records 00339 t_file, 00340 /// t_lgrec is used for storing large record pages 00341 /// and is always associated with some t_file store 00342 t_lgrec 00343 }; 00344 00345 // types of indexes 00346 00347 /**\brief Index types */ 00348 enum ndx_t { 00349 t_bad_ndx_t, // illegal value 00350 t_btree, // B+tree with duplicates 00351 t_uni_btree, // Unique-key btree 00352 t_rtree // R*tree 00353 }; 00354 00355 /**\enum concurrency_t 00356 * \brief 00357 * Lock granularities 00358 * \details 00359 * - t_cc_bad Illegal 00360 * - t_cc_none No locking 00361 * - t_cc_record Record-level locking for files & records 00362 * - t_cc_page Page-level locking for files & records 00363 * - t_cc_file File-level locking for files & records 00364 * - t_cc_vol Volume-level locking for files and indexes 00365 * - t_cc_kvl Key-value locking for B+-Tree indexes 00366 * - t_cc_im Aries IM locking for B+-Tree indexes : experimental 00367 * - t_cc_modkvl Modified key-value locking: experimental 00368 * - t_cc_append Used internally \todo true? 00369 */ 00370 enum concurrency_t { 00371 t_cc_bad, // this is an illegal value 00372 t_cc_none, // no locking 00373 t_cc_record, // record-level 00374 t_cc_page, // page-level 00375 t_cc_file, // file-level 00376 t_cc_vol, 00377 t_cc_kvl, // key-value 00378 t_cc_im, // ARIES IM, not supported yet 00379 t_cc_modkvl, // modified ARIES KVL, for paradise use 00380 t_cc_append // append-only with scan_file_i 00381 }; 00382 00383 /**\cond skip */ 00384 00385 /* 00386 * smlevel_0::operating_mode is always set to 00387 * ONE of these, but the function in_recovery() tests for 00388 * any of them, so we'll give them bit-mask values 00389 */ 00390 enum operating_mode_t { 00391 t_not_started = 0, 00392 t_in_analysis = 0x1, 00393 t_in_redo = 0x2, 00394 t_in_undo = 0x4, 00395 t_forward_processing = 0x8 00396 }; 00397 00398 static concurrency_t cc_alg; // concurrency control algorithm 00399 static bool cc_adaptive; // is PS-AA (adaptive) algorithm used? 00400 00401 #include "e_error_enum_gen.h" 00402 00403 static const w_error_info_t error_info[]; 00404 static void init_errorcodes(); 00405 00406 static void add_to_global_stats(const sm_stats_info_t &from); 00407 static void add_from_global_stats(sm_stats_info_t &to); 00408 00409 static device_m* dev; 00410 static io_m* io; 00411 static bf_m* bf; 00412 static lock_m* lm; 00413 00414 static log_m* log; 00415 static tid_t* redo_tid; 00416 00417 static LOG_WARN_CALLBACK_FUNC log_warn_callback; 00418 static LOG_ARCHIVED_CALLBACK_FUNC log_archived_callback; 00419 static fileoff_t log_warn_trigger; 00420 static int log_warn_exceed_percent; 00421 00422 static int dcommit_timeout; // to convey option to coordinator, 00423 // if it is created by VAS 00424 00425 static ErrLog* errlog; 00426 00427 static bool shutdown_clean; 00428 static bool shutting_down; 00429 static bool logging_enabled; 00430 static bool lock_caching_default; 00431 static bool do_prefetch; 00432 00433 static operating_mode_t operating_mode; 00434 static bool in_recovery() { 00435 return ((operating_mode & 00436 (t_in_redo | t_in_undo | t_in_analysis)) !=0); } 00437 static bool in_recovery_analysis() { 00438 return ((operating_mode & t_in_analysis) !=0); } 00439 static bool in_recovery_undo() { 00440 return ((operating_mode & t_in_undo ) !=0); } 00441 static bool in_recovery_redo() { 00442 return ((operating_mode & t_in_redo ) !=0); } 00443 00444 // these variable are the default values for lock escalation counts 00445 static w_base_t::int4_t defaultLockEscalateToPageThreshold; 00446 static w_base_t::int4_t defaultLockEscalateToStoreThreshold; 00447 static w_base_t::int4_t defaultLockEscalateToVolumeThreshold; 00448 00449 // These variables control the size of the log. 00450 static fileoff_t max_logsz; // max log file size 00451 00452 // This variable controls checkpoint frequency. 00453 // Checkpoints are taken every chkpt_displacement bytes 00454 // written to the log. 00455 static fileoff_t chkpt_displacement; 00456 00457 // The volume_format_version is used to test compatability 00458 // of software with a volume. Whenever a change is made 00459 // to the SM software that makes it incompatible with 00460 // previouly formatted volumes, this volume number should 00461 // be incremented. The value is set in sm.cpp. 00462 static w_base_t::uint4_t volume_format_version; 00463 00464 // This is a zeroed page for use wherever initialized memory 00465 // is needed. 00466 static char zero_page[page_sz]; 00467 00468 // option for controlling background buffer flush thread 00469 static option_t* _backgroundflush; 00470 00471 00472 /* 00473 * Pre-defined store IDs -- see also vol.h 00474 * 0 -- is reserved for the extent map and the store map 00475 * 1 -- directory (see dir.cpp) 00476 * 2 -- root index (see sm.cpp) 00477 */ 00478 enum { 00479 store_id_extentmap = 0, 00480 store_id_directory = 1, 00481 store_id_root_index = 2 00482 }; 00483 00484 enum { 00485 eINTERNAL = fcINTERNAL, 00486 eOS = fcOS, 00487 eOUTOFMEMORY = fcOUTOFMEMORY, 00488 eNOTFOUND = fcNOTFOUND, 00489 eNOTIMPLEMENTED = fcNOTIMPLEMENTED 00490 }; 00491 00492 enum store_flag_t { 00493 // NB: this had better match sm_store_property_t (sm_int_3.h) !!! 00494 // or at least be convted properly every time we come through the API 00495 st_bad = 0x0, 00496 st_regular = 0x01, // fully logged 00497 st_tmp = 0x02, // space logging only, 00498 // file destroy on dismount/restart 00499 st_load_file = 0x04, // not stored in the stnode_t, 00500 // only passed down to 00501 // io_m and then converted to tmp and added to the 00502 // list of load files for the xct. 00503 // no longer needed 00504 st_insert_file = 0x08, // stored in stnode, but not on page. 00505 // new pages are saved as tmp, old pages as regular. 00506 st_empty = 0x100 // store might be empty - used ONLY 00507 // as a function argument, NOT stored 00508 // persistently. Nevertheless, it's 00509 // defined here to be sure that if other 00510 // store flags are added, this doesn't 00511 // conflict with them. 00512 }; 00513 00514 /* 00515 * for use by set_store_deleting_log; 00516 * type of operation to perform on the stnode 00517 */ 00518 enum store_operation_t { 00519 t_delete_store, 00520 t_create_store, 00521 t_set_deleting, 00522 t_set_store_flags, 00523 t_set_first_ext}; 00524 00525 enum store_deleting_t { 00526 t_not_deleting_store, 00527 t_deleting_store, 00528 t_store_freeing_exts, 00529 t_unknown_deleting}; 00530 /**\endcond skip */ 00531 }; 00532 00533 /**\cond skip */ 00534 ostream& 00535 operator<<(ostream& o, smlevel_0::store_flag_t flag); 00536 00537 ostream& 00538 operator<<(ostream& o, const smlevel_0::store_operation_t op); 00539 00540 ostream& 00541 operator<<(ostream& o, const smlevel_0::store_deleting_t value); 00542 00543 /**\endcond skip */ 00544 00545 /*<std-footer incl-file-exclusion='SM_BASE_H'> -- do not edit anything below this line -- */ 00546 00547 #endif /*</std-footer>*/