sm_base.h

Go to the documentation of this file.
00001 /* -*- mode:C++; c-basic-offset:4 -*-
00002      Shore-MT -- Multi-threaded port of the SHORE storage manager
00003    
00004                        Copyright (c) 2007-2009
00005       Data Intensive Applications and Systems Labaratory (DIAS)
00006                Ecole Polytechnique Federale de Lausanne
00007    
00008                          All Rights Reserved.
00009    
00010    Permission to use, copy, modify and distribute this software and
00011    its documentation is hereby granted, provided that both the
00012    copyright notice and this permission notice appear in all copies of
00013    the software, derivative works or modified versions, and any
00014    portions thereof, and that both notices appear in supporting
00015    documentation.
00016    
00017    This code is distributed in the hope that it will be useful, but
00018    WITHOUT ANY WARRANTY; without even the implied warranty of
00019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
00020    DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
00021    RESULTING FROM THE USE OF THIS SOFTWARE.
00022 */
00023 
00024 /*<std-header orig-src='shore' incl-file-exclusion='SM_BASE_H'>
00025 
00026  $Id: sm_base.h,v 1.154 2010/07/07 21:43:46 nhall Exp $
00027 
00028 SHORE -- Scalable Heterogeneous Object REpository
00029 
00030 Copyright (c) 1994-99 Computer Sciences Department, University of
00031                       Wisconsin -- Madison
00032 All Rights Reserved.
00033 
00034 Permission to use, copy, modify and distribute this software and its
00035 documentation is hereby granted, provided that both the copyright
00036 notice and this permission notice appear in all copies of the
00037 software, derivative works or modified versions, and any portions
00038 thereof, and that both notices appear in supporting documentation.
00039 
00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
00044 
00045 This software was developed with support by the Advanced Research
00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by
00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
00048 Further funding for this work was provided by DARPA through
00049 Rome Research Laboratory Contract No. F30602-97-2-0247.
00050 
00051 */
00052 
00053 #ifndef SM_BASE_H
00054 #define SM_BASE_H
00055 
00056 #include "w_defines.h"
00057 
00058 /*  -- do not edit anything above this line --   </std-header>*/
00059 
00060 /**\file sm_base.h
00061  * \ingroup Macros
00062  */
00063 
00064 #ifdef __GNUG__
00065 #pragma interface
00066 #endif
00067 
00068 #include <climits>
00069 #ifndef OPTION_H
00070 #include "option.h"
00071 #endif
00072 #ifndef __opt_error_def_gen_h__
00073 #include "opt_error_def_gen.h"
00074 #endif
00075 
00076 
00077 class ErrLog;
00078 class sm_stats_info_t;
00079 class xct_t;
00080 class xct_i;
00081 
00082 class device_m;
00083 class io_m;
00084 class bf_m;
00085 class comm_m;
00086 class log_m;
00087 class lock_m;
00088 
00089 class tid_t;
00090 class option_t;
00091 
00092 #ifndef        SM_EXTENTSIZE
00093 #define        SM_EXTENTSIZE        8
00094 #endif
00095 #ifndef        SM_LOG_PARTITIONS
00096 #define        SM_LOG_PARTITIONS        8
00097 #endif
00098 
00099 typedef   w_rc_t        rc_t;
00100 
00101 
00102 /**\cond skip
00103  * This structure collects the depth on construction
00104  * and checks that it matches the depth on destruction; this
00105  * is to ensure that we haven't forgotten to release
00106  * an anchor somewhere.
00107  * It's been extended to check the # times
00108  * we have acquired the 1thread_log_mutex. 
00109  *
00110  * We're defining the CHECK_NESTING_VARIABLES macro b/c
00111  * this work is spread out and we want to have 1 place to
00112  * determine whether it's turned on or off; don't want to 
00113  * make the mistake of changing the debug level (on which
00114  * it depends) in only one of several places.
00115  *
00116  * NOTE: this doesn't work in a multi-threaded xct context.
00117  * That's b/c the check is too late -- once the count goes
00118  * to zero, another thread can change it and throw off all the
00119  * counts. To be sure, we'd have to use a TLS copy as well
00120  * as the common copy of these counts.
00121  */
00122 #if W_DEBUG_LEVEL > 0
00123 #define CHECK_NESTING_VARIABLES 1
00124 #else
00125 #define CHECK_NESTING_VARIABLES 0
00126 #endif
00127 struct check_compensated_op_nesting {
00128 #if CHECK_NESTING_VARIABLES
00129     xct_t* _xd;
00130     int _depth;
00131     int _depth_of_acquires;
00132     int _line;
00133     const char *const _file;
00134     // static methods are so we can avoid having to
00135     // include xct.h here.
00136     static int compensated_op_depth(xct_t* xd, int dflt);
00137     static int acquire_1thread_log_depth(xct_t* xd, int dflt);
00138 
00139     check_compensated_op_nesting(xct_t* xd, int line, const char *const file)
00140     : _xd(xd), 
00141     _depth(_xd? compensated_op_depth(_xd, 0) : 0), 
00142     _depth_of_acquires(_xd? acquire_1thread_log_depth(_xd, 0) : 0), 
00143     _line(line),
00144     _file(file)
00145     {
00146     }
00147 
00148     ~check_compensated_op_nesting() {
00149         if(_xd) {
00150             if( _depth != compensated_op_depth(_xd, _depth) ) {
00151                 fprintf(stderr, 
00152                     "th.%d check_compensated_op_nesting(%d,%s) depth was %d is %d\n",
00153                     sthread_t::me()->id,
00154                     _line, _file, _depth, compensated_op_depth(_xd, _depth));
00155             }
00156 
00157             if(_depth_of_acquires != acquire_1thread_log_depth(_xd, _depth)) {
00158                 fprintf(stderr, 
00159                 "th.%d check_acquire_1thread_log_depth (%d,%s) depth was %d is %d\n",
00160                     sthread_t::me()->id,
00161                     _line, _file, _depth_of_acquires, 
00162                     acquire_1thread_log_depth(_xd, _depth));
00163             }
00164 
00165             w_assert0(_depth == compensated_op_depth(_xd, _depth));
00166             w_assert0(_depth_of_acquires == acquire_1thread_log_depth(_xd, _depth));
00167         }
00168     }
00169 #else
00170     check_compensated_op_nesting(xct_t*, int, const char *const) { }
00171 #endif
00172 };
00173 
00174 
00175 /**\brief Encapsulates a few types uses in the API */
00176 class smlevel_0 : public w_base_t {
00177 public:
00178     enum { eNOERROR = 0, eFAILURE = -1 };
00179     enum { 
00180         page_sz = SM_PAGESIZE,        // page size (SM_PAGESIZE is set by makemake)
00181         ext_sz = SM_EXTENTSIZE,        // extent size
00182         max_exts = max_int4,        // max no. extents, must fit extnum_t
00183 #if defined(_POSIX_PATH_MAX)
00184         max_devname = _POSIX_PATH_MAX,        // max length of unix path name
00185     // BEWARE: this might be larger than you want.  Array sizes depend on it.
00186     // The default might be small enough, e.g., 256; getconf() yields the upper
00187     // bound on this value.
00188 #elif defined(MAXPATHLEN)
00189         max_devname = MAXPATHLEN,
00190 #else
00191         max_devname = 1024,        
00192 #endif
00193         max_vols = 20,                // max mounted volumes
00194         max_xct_thread = 20,        // max threads in a xct
00195         max_servers = 15,       // max servers to be connected with
00196         max_keycomp = 20,        // max key component (for btree)
00197         max_openlog = SM_LOG_PARTITIONS,        // max # log partitions
00198         max_dir_cache = max_vols * 10,
00199 
00200         /* XXX I want to propogate sthread_t::iovec_max here, but
00201            it doesn't work because of sm_app.h not including
00202            the thread package. */
00203         max_many_pages = 8,
00204 
00205         srvid_map_sz = (max_servers - 1) / 8 + 1,
00206         ext_map_sz_in_bytes = ((ext_sz + 7) / 8),
00207 
00208         dummy = 0
00209     };
00210 
00211     enum {
00212         max_rec_len = max_uint4
00213     };
00214 
00215     typedef sthread_base_t::fileoff_t fileoff_t;
00216     /*
00217      * Sizes-in-Kbytes for for things like volumes and devices.
00218      * A KB is assumes to be 1024 bytes.
00219      * Note: a different type was used for added type checking.
00220      */
00221     typedef sthread_t::fileoff_t smksize_t;
00222     typedef w_base_t::base_stat_t base_stat_t; 
00223 
00224     /**\endcond skip */
00225 
00226     /*
00227      * rather than automatically aborting the transaction, when the
00228      * _log_warn_percent is exceeded, this callback is made, with a
00229      * pointer to the xct that did the writing, and with the
00230      * expectation that the result will be one of:
00231      * - return value == RCOK --> proceed
00232      * - return value == eUSERABORT --> victim to abort is given in the argument
00233      *
00234      * The server has the responsibility for choosing a victim and 
00235      * for aborting the victim transaction. 
00236      *
00237      */
00238 
00239     /**\brief Log space warning callback function type.  
00240      *
00241      * For more details of how this is used, see the constructor ss_m::ss_m().
00242      *
00243      * Storage manager methods check the available log space. 
00244      * If the log is in danger of filling to the point that it will be
00245      * impossible to abort a transaction, a
00246      * callback is made to the server.  The callback function is of this type.
00247      * The danger point is a threshold determined by the option sm_log_warn. 
00248      *
00249      * The callback
00250      * function is meant to choose a victim xct and 
00251      * tell if the xct should be
00252      * aborted by returning RC(eUSERABORT).  
00253      *
00254      * Any other RC value is returned to the server through the call stack.
00255      *
00256      * The arguments:
00257      * @param[in] iter    Pointer to an iterator over all xcts.
00258      * @param[out] victim    Victim will be returned here. This is an in/out
00259      * paramter and is initially populated with the transaction that is
00260      * attached to the running thread.
00261      * @param[in] curr    Bytes of log consumed by active transactions.
00262      * @param[in] thresh   Threshhold just exceeded. 
00263      * @param[in] logfile   Character string name of oldest file to archive.
00264      *                     
00265      *  This function must be careful not to return the same victim more
00266      *  than once, even though the callback may be called many 
00267      *  times before the victim is completely aborted.
00268      *
00269      *  When this function has archived the given log file, it needs
00270      *  to notify the storage manager of that fact by calling
00271      *  ss_m::log_file_was_archived(logfile)
00272      */
00273     typedef w_rc_t (*LOG_WARN_CALLBACK_FUNC) (
00274             xct_i*      iter,     
00275             xct_t *&    victim, 
00276             fileoff_t   curr, 
00277             fileoff_t   thresh, 
00278             const char *logfile
00279         );
00280     /**\brief Callback function type for restoring an archived log file.
00281      *
00282      * @param[in] fname   Original file name (with path).
00283      * @param[in] needed   Partition number of the file needed.
00284      *
00285      *  An alternative to aborting a transaction (when the log fills)
00286      *  is to archive log files.
00287      *  The server can use the log directory name to locate these files,
00288      *  and may use the iterator and the static methods of xct_t to 
00289      *  determine which log file(s) to archive.
00290      *
00291      *  Archiving and removing the older log files will work only if
00292      *  the server also provides a LOG_ARCHIVED_CALLBACK_FUNCTION 
00293      *  to restore the
00294      *  archived log files when the storage manager needs them for
00295      *  rollback.
00296      *  This is the function type used for that purpose.
00297      *
00298      *  The function must locate the archived log file containing for the
00299      *  partition number \a num, which was a suffix of the original log file's
00300      *  name.
00301      *  The log file must be restored with its original name.  
00302      */
00303     typedef    w_base_t::uint4_t partition_number_t; 
00304     typedef w_rc_t (*LOG_ARCHIVED_CALLBACK_FUNC) (
00305             const char *fname,
00306             partition_number_t num
00307         );
00308 
00309 /**\cond skip */
00310     enum switch_t {
00311         ON = 1,
00312         OFF = 0
00313     };
00314 /**\endcond skip */
00315 
00316     /**\brief Comparison types used in scan_index_i
00317      * \enum cmp_t
00318      * Shorthand for CompareOp.
00319      */
00320     enum cmp_t { bad_cmp_t=badOp, eq=eqOp,
00321                  gt=gtOp, ge=geOp, lt=ltOp, le=leOp };
00322 
00323 
00324     /* used by lock escalation routines */
00325     enum escalation_options {
00326         dontEscalate        = max_int4_minus1,
00327         dontEscalateDontPassOn,
00328         dontModifyThreshold        = -1
00329     };
00330 
00331     /**\brief Types of stores.
00332      * \enum store_t
00333      */
00334     enum store_t { 
00335         t_bad_store_t, 
00336         /// a b-tree or r-tree index
00337         t_index, 
00338         /// a file of records
00339         t_file, 
00340         /// t_lgrec is used for storing large record pages 
00341         /// and is always associated with some t_file store
00342         t_lgrec 
00343     };
00344     
00345     // types of indexes
00346 
00347     /**\brief Index types */
00348     enum ndx_t { 
00349         t_bad_ndx_t,             // illegal value
00350         t_btree,                 // B+tree with duplicates
00351         t_uni_btree,             // Unique-key btree
00352         t_rtree                  // R*tree
00353     };
00354 
00355     /**\enum concurrency_t 
00356      * \brief 
00357      * Lock granularities 
00358      * \details
00359      * - t_cc_bad Illegal
00360      * - t_cc_none No locking
00361      * - t_cc_record Record-level locking for files & records
00362      * - t_cc_page Page-level locking for files & records 
00363      * - t_cc_file File-level locking for files & records 
00364      * - t_cc_vol Volume-level locking for files and indexes 
00365      * - t_cc_kvl Key-value locking for B+-Tree indexes
00366      * - t_cc_im Aries IM locking for B+-Tree indexes : experimental
00367      * - t_cc_modkvl Modified key-value locking: experimental
00368      * - t_cc_append Used internally \todo true?
00369      */
00370     enum concurrency_t {
00371         t_cc_bad,                // this is an illegal value
00372         t_cc_none,                // no locking
00373         t_cc_record,                // record-level
00374         t_cc_page,                // page-level
00375         t_cc_file,                // file-level
00376         t_cc_vol,
00377         t_cc_kvl,                // key-value
00378         t_cc_im,                 // ARIES IM, not supported yet
00379         t_cc_modkvl,                 // modified ARIES KVL, for paradise use
00380         t_cc_append                 // append-only with scan_file_i
00381     };
00382 
00383 /**\cond skip */
00384 
00385     /* 
00386      * smlevel_0::operating_mode is always set to 
00387      * ONE of these, but the function in_recovery() tests for
00388      * any of them, so we'll give them bit-mask values
00389      */
00390     enum operating_mode_t {
00391         t_not_started = 0, 
00392         t_in_analysis = 0x1,
00393         t_in_redo = 0x2,
00394         t_in_undo = 0x4,
00395         t_forward_processing = 0x8
00396     };
00397 
00398     static concurrency_t cc_alg;        // concurrency control algorithm
00399     static bool          cc_adaptive;        // is PS-AA (adaptive) algorithm used?
00400 
00401 #include "e_error_enum_gen.h"
00402 
00403     static const w_error_info_t error_info[];
00404     static void init_errorcodes();
00405 
00406     static void  add_to_global_stats(const sm_stats_info_t &from);
00407     static void  add_from_global_stats(sm_stats_info_t &to);
00408 
00409     static device_m* dev;
00410     static io_m* io;
00411     static bf_m* bf;
00412     static lock_m* lm;
00413 
00414     static log_m* log;
00415     static tid_t* redo_tid;
00416 
00417     static LOG_WARN_CALLBACK_FUNC log_warn_callback;
00418     static LOG_ARCHIVED_CALLBACK_FUNC log_archived_callback;
00419     static fileoff_t              log_warn_trigger; 
00420     static int                    log_warn_exceed_percent; 
00421 
00422     static int    dcommit_timeout; // to convey option to coordinator,
00423                                    // if it is created by VAS
00424 
00425     static ErrLog* errlog;
00426 
00427     static bool        shutdown_clean;
00428     static bool        shutting_down;
00429     static bool        logging_enabled;
00430     static bool        lock_caching_default;
00431     static bool        do_prefetch;
00432 
00433     static operating_mode_t operating_mode;
00434     static bool in_recovery() { 
00435         return ((operating_mode & 
00436                 (t_in_redo | t_in_undo | t_in_analysis)) !=0); }
00437     static bool in_recovery_analysis() { 
00438         return ((operating_mode & t_in_analysis) !=0); }
00439     static bool in_recovery_undo() { 
00440         return ((operating_mode & t_in_undo ) !=0); }
00441     static bool in_recovery_redo() { 
00442         return ((operating_mode & t_in_redo ) !=0); }
00443 
00444     // these variable are the default values for lock escalation counts
00445     static w_base_t::int4_t defaultLockEscalateToPageThreshold;
00446     static w_base_t::int4_t defaultLockEscalateToStoreThreshold;
00447     static w_base_t::int4_t defaultLockEscalateToVolumeThreshold;
00448 
00449     // These variables control the size of the log.
00450     static fileoff_t max_logsz; // max log file size
00451 
00452     // This variable controls checkpoint frequency.
00453     // Checkpoints are taken every chkpt_displacement bytes
00454     // written to the log.
00455     static fileoff_t chkpt_displacement;
00456 
00457     // The volume_format_version is used to test compatability
00458     // of software with a volume.  Whenever a change is made
00459     // to the SM software that makes it incompatible with
00460     // previouly formatted volumes, this volume number should
00461     // be incremented.  The value is set in sm.cpp.
00462     static w_base_t::uint4_t volume_format_version;
00463 
00464     // This is a zeroed page for use wherever initialized memory
00465     // is needed.
00466     static char zero_page[page_sz];
00467 
00468     // option for controlling background buffer flush thread
00469     static option_t* _backgroundflush;
00470 
00471 
00472     /*
00473      * Pre-defined store IDs -- see also vol.h
00474      * 0 -- is reserved for the extent map and the store map
00475      * 1 -- directory (see dir.cpp)
00476      * 2 -- root index (see sm.cpp)
00477      */
00478     enum {
00479         store_id_extentmap = 0,
00480         store_id_directory = 1,
00481         store_id_root_index = 2 
00482     };
00483 
00484     enum {
00485             eINTERNAL = fcINTERNAL,
00486             eOS = fcOS,
00487             eOUTOFMEMORY = fcOUTOFMEMORY,
00488             eNOTFOUND = fcNOTFOUND,
00489             eNOTIMPLEMENTED = fcNOTIMPLEMENTED
00490     };
00491 
00492     enum store_flag_t {
00493         // NB: this had better match sm_store_property_t (sm_int_3.h) !!!
00494         // or at least be convted properly every time we come through the API
00495         st_bad            = 0x0,
00496         st_regular        = 0x01, // fully logged
00497         st_tmp            = 0x02, // space logging only, 
00498                                   // file destroy on dismount/restart
00499         st_load_file      = 0x04, // not stored in the stnode_t, 
00500                             // only passed down to
00501                             // io_m and then converted to tmp and added to the
00502                             // list of load files for the xct.
00503                             // no longer needed
00504         st_insert_file     = 0x08,        // stored in stnode, but not on page.
00505                             // new pages are saved as tmp, old pages as regular.
00506         st_empty           = 0x100 // store might be empty - used ONLY
00507                             // as a function argument, NOT stored
00508                             // persistently.  Nevertheless, it's
00509                             // defined here to be sure that if other
00510                             // store flags are added, this doesn't
00511                             // conflict with them.
00512     };
00513 
00514     /* 
00515      * for use by set_store_deleting_log; 
00516      * type of operation to perform on the stnode 
00517      */
00518     enum store_operation_t {
00519             t_delete_store, 
00520             t_create_store, 
00521             t_set_deleting, 
00522             t_set_store_flags, 
00523             t_set_first_ext};
00524 
00525     enum store_deleting_t  {
00526             t_not_deleting_store, 
00527             t_deleting_store, 
00528             t_store_freeing_exts, 
00529             t_unknown_deleting};
00530 /**\endcond skip */
00531 };
00532 
00533 /**\cond skip */
00534 ostream&
00535 operator<<(ostream& o, smlevel_0::store_flag_t flag);
00536 
00537 ostream&
00538 operator<<(ostream& o, const smlevel_0::store_operation_t op);
00539 
00540 ostream&
00541 operator<<(ostream& o, const smlevel_0::store_deleting_t value);
00542 
00543 /**\endcond skip */
00544 
00545 /*<std-footer incl-file-exclusion='SM_BASE_H'>  -- do not edit anything below this line -- */
00546 
00547 #endif          /*</std-footer>*/

Generated on Wed Jul 7 17:22:32 2010 for Shore Storage Manager by  doxygen 1.4.7