w_hash.h

Go to the documentation of this file.
00001 /* -*- mode:C++; c-basic-offset:4 -*-
00002      Shore-MT -- Multi-threaded port of the SHORE storage manager
00003    
00004                        Copyright (c) 2007-2009
00005       Data Intensive Applications and Systems Labaratory (DIAS)
00006                Ecole Polytechnique Federale de Lausanne
00007    
00008                          All Rights Reserved.
00009    
00010    Permission to use, copy, modify and distribute this software and
00011    its documentation is hereby granted, provided that both the
00012    copyright notice and this permission notice appear in all copies of
00013    the software, derivative works or modified versions, and any
00014    portions thereof, and that both notices appear in supporting
00015    documentation.
00016    
00017    This code is distributed in the hope that it will be useful, but
00018    WITHOUT ANY WARRANTY; without even the implied warranty of
00019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
00020    DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
00021    RESULTING FROM THE USE OF THIS SOFTWARE.
00022 */
00023 
00024 // -*- mode:c++; c-basic-offset:4 -*-
00025 /*<std-header orig-src='shore' incl-file-exclusion='W_HASH_H'>
00026 
00027  $Id: w_hash.h,v 1.36 2010/06/15 17:24:25 nhall Exp $
00028 
00029 SHORE -- Scalable Heterogeneous Object REpository
00030 
00031 Copyright (c) 1994-99 Computer Sciences Department, University of
00032                       Wisconsin -- Madison
00033 All Rights Reserved.
00034 
00035 Permission to use, copy, modify and distribute this software and its
00036 documentation is hereby granted, provided that both the copyright
00037 notice and this permission notice appear in all copies of the
00038 software, derivative works or modified versions, and any portions
00039 thereof, and that both notices appear in supporting documentation.
00040 
00041 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
00042 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
00043 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
00044 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
00045 
00046 This software was developed with support by the Advanced Research
00047 Project Agency, ARPA order number 018 (formerly 8230), monitored by
00048 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
00049 Further funding for this work was provided by DARPA through
00050 Rome Research Laboratory Contract No. F30602-97-2-0247.
00051 
00052 */
00053 
00054 #ifndef W_HASH_H
00055 #define W_HASH_H
00056 
00057 #include "w_defines.h"
00058 
00059 /*  -- do not edit anything above this line --   </std-header>*/
00060 
00061 /**\file w_hash.h
00062  */
00063 
00064 #include <w_base.h>
00065 #include <w_list.h>
00066 
00067 
00068 template <class T, class LOCK, class K> class w_hash_t;
00069 template <class T, class LOCK, class K> class w_hash_i;
00070 
00071 inline w_base_t::uint4_t w_hash(long l)
00072 {
00073     return (w_base_t::uint4_t) l;
00074 }
00075 
00076 inline w_base_t::uint4_t w_hash(unsigned long l)
00077 {
00078     return (w_base_t::uint4_t) l;
00079 }
00080 
00081 inline w_base_t::uint4_t w_hash(w_base_t::uint4_t i)  {
00082     return i;
00083 }
00084 
00085 inline w_base_t::uint4_t w_hash(w_base_t::int4_t i)   {
00086     return CAST(w_base_t::uint4_t,i);
00087 }
00088 
00089 inline w_base_t::uint4_t w_hash(w_base_t::uint2_t i)  {
00090     return i;
00091 }
00092 
00093 inline w_base_t::uint4_t w_hash(w_base_t::int2_t i)   {
00094     return CAST(w_base_t::int2_t, i);
00095 }
00096 
00097 BIND_FRIEND_OPERATOR_PART_1B(T,LOCK,K,w_hash_t<T,LOCK,K>)
00098 
00099 
00100 /**\brief Templated hash table. Not particularly sophisticated.
00101  *
00102  * The hash function used here is :
00103  * - w_hash(key) & _mask
00104  *
00105  * Thus, to make this work, you need to define a (static, global, or inlined)
00106  * function
00107  * \code
00108  * w_base_t::uint4_t w_hash(const K &key)
00109  * \endcode
00110  *
00111  * Note that since the hash function uses the _mask to collect the lower
00112  * bits of the result of w_hash, the w_hash(key) function should be sensitive
00113  * to the way hash-table uses of the w_hash function, and the hash tables 
00114  * should be aware of the likely bit distribution
00115  * of the result of w_hash(key).
00116  *
00117  * \bug (GNATS 114, non-critical performance issue) improve these hash funcs. 
00118  * This is used in: histograms, dirty page table for restart.
00119  *
00120  */
00121 template <class T, class LOCK, class K>
00122 class w_hash_t : public w_base_t {
00123 public:
00124     /**\brief Construct hash table 
00125      * @param[in] sz Number of bits in result values. 
00126      * @param[in] key_offset Offset in object of type T where key K is found
00127      * @param[in] link_offset Offset in object of type T where w_link_t is found
00128      *            This w_link_t is used to hold the object in a hash table bucket
00129      * @param[in] lock Pointer to a lock used to protect the table.
00130      *
00131      * The size determines a mask used to restrict the resulting 
00132      * hash values to a desired size.
00133      *
00134      * The lock passed in is not used.  There is no enforcement of
00135      * locking these structures. The template contains the lock type
00136      * so that it is relatively easy to tell what structures are
00137      * unprotected by perusing the code.
00138      *
00139      * See \ref #W_HASH_ARG(class,key,link)
00140      */
00141     NORET                        w_hash_t(
00142         uint4_t                     sz,
00143         uint4_t                     key_offset,
00144         uint4_t                     link_offset,
00145         const LOCK *                lock);
00146     NORET                        ~w_hash_t();
00147 
00148 private:
00149     uint4_t                     bucket_for(K const &k) const;
00150     uint4_t                     bucket_for(T const *t) const;
00151 
00152 public:
00153     /// Insert an element in the table at the front of its bucket.
00154     w_hash_t&                   push(T* t);
00155     /// Insert an element in the table at the tail of its bucket.
00156     w_hash_t&                   append(T* t);
00157     /// Find an element in the table.
00158     T*                          lookup(const K& k) const;
00159     /// True if element is in the table.
00160     bool                        member(T const *t) const;
00161     /// Remove the (single) element with the given key 
00162     T*                          remove(const K& k);
00163     /// Remove the given element that is in the table.
00164     void                        remove(T* t);
00165     /// Total number of elements in the table.
00166     uint4_t                     num_members() const { return _cnt; }
00167 
00168     /// Standard ostream operator, despite the macro here (in \ref w_workaround.h)
00169     friend ostream&             operator<< 
00170                                      BIND_FRIEND_OPERATOR_PART_2B(T,LOCK,K) (
00171         ostream&                     o,
00172         const w_hash_t<T,LOCK,K>&        obj);
00173     
00174 
00175 private:
00176     friend class w_hash_i<T,LOCK,K>;
00177     uint4_t                        _top;
00178     uint4_t                        _mask;
00179     uint4_t                        _cnt;
00180     uint4_t                        _key_offset;
00181     uint4_t                        _link_offset;
00182     w_list_t<T, LOCK>*             _tab;
00183 
00184     const K&                       _keyof(const T& t) const  {
00185         return * (K*) (((const char*)&t) + _key_offset);
00186     }
00187     w_link_t&                       _linkof(T& t) const  {
00188         return * (w_link_t*) (((char*)&t) + _link_offset);
00189     }
00190 
00191     // disabled
00192     NORET                           w_hash_t(const w_hash_t&)
00193     ;
00194     w_hash_t&                       operator=(const w_hash_t&)
00195     ;
00196 };
00197 
00198 // XXX They are the same for now, avoids offsetof duplication
00199 /**\def W_HASH_ARG(class,key,link)
00200  * \brief Idiom for creating constructor argument for \ref #w_hash_t.
00201  *
00202  * This macro produces the last two arguments of the w_hash_t constructor.
00203  * Example :
00204  * \code
00205  * class key_t;
00206  * class entry_t {
00207  *    ...
00208  *    public:
00209  *       key_t    hashkey;
00210  *       w_link_t hashlink;
00211  * };
00212  *
00213  * w_hash_t<entry_t,key_t>(16, W_HASH_ARG(entry_t,hashkey,hashlink)) hashtable;
00214  * \endcode
00215  *
00216  */
00217 #define        W_HASH_ARG(class,key,link)  W_KEYED_ARG(class, key, link)
00218 
00219 
00220 /**\brief Iterate over hash table (for debugging)
00221  *
00222  * \note Not for general use.  Helper for w_hash_t,
00223  * and useful for writing debugging / dump-table code.
00224  *
00225  * Example:
00226  * \code
00227  * w_hash_t<entry_t,key_t>(16, W_HASH_ARG(entry_t,hashkey,hashlink)) hashtable;
00228  *
00229  * w_hash_i<entry_t,key_t> iter(hashtable);
00230  * entry_t *entry = NULL;
00231  * while( ( entry = iter.next()) != NULL) {
00232  *    ...
00233  * }
00234  * \endcode
00235  *
00236  * Since the w_hash_t is built of w_list_t, the same comments go for
00237  * next() and curr() here.  You can remove items from the table in
00238  * an iteration but you cannot insert.
00239  */
00240 template <class T, class LOCK, class K>
00241 class w_hash_i : public w_base_t {
00242 public:
00243     NORET           w_hash_i(const w_hash_t<T,LOCK, K>& t) : _bkt(uint4_max), 
00244                                                     _htab(t) {};
00245         
00246     NORET           ~w_hash_i()        {};
00247     
00248     T*              next();
00249     T*              curr()                { return _iter.curr(); }
00250 
00251 private:
00252     uint4_t                   _bkt;
00253     w_list_i<T,LOCK>         _iter;
00254     const w_hash_t<T,LOCK, K>&     _htab;
00255     
00256     NORET           w_hash_i(w_hash_i&);
00257 
00258     w_hash_i&       operator=(w_hash_i&)
00259     ;
00260 };
00261 
00262 
00263 template <class T, class LOCK, class K>
00264 ostream& operator<<(
00265     ostream&                        o,
00266     const w_hash_t<T,LOCK, K>&        h)
00267 {
00268     for (int i = 0; i < h._top; i++)  {
00269         o << '[' << i << "] ";
00270         w_list_i<T,LOCK> iter(h._tab[i]);
00271         while (iter.next())  {
00272             o << h._keyof(*iter.curr()) << " ";
00273         }
00274         o << endl;
00275     }
00276     return o;
00277 }
00278 
00279 /**\cond skip */
00280 template <class T, class LOCK, class K>
00281 NORET
00282 w_hash_t<T,LOCK, K>::w_hash_t(
00283     w_base_t::uint4_t        sz,
00284     w_base_t::uint4_t        key_offset,
00285     w_base_t::uint4_t        link_offset,
00286     const LOCK *)
00287 : _top(0), _cnt(0), _key_offset(key_offset),
00288   _link_offset(link_offset), _tab(0)
00289 {
00290     for (_top = 1; _top < sz; _top <<= 1) ;
00291     _mask = _top - 1;
00292     
00293     w_assert1(!_tab); // just to check space
00294     _tab = new w_list_t<T,LOCK>[_top];
00295     w_assert1(_tab);
00296     for (unsigned i = 0; i < _top; i++)  {
00297         _tab[i].set_offset(_link_offset);
00298     }
00299 }
00300 
00301 template <class T, class LOCK, class K>
00302 NORET
00303 w_hash_t<T,LOCK, K>::~w_hash_t()
00304 {
00305     w_assert1(_cnt == 0);
00306     delete[] _tab;
00307 }
00308 /**\endcond skip */
00309 
00310 template<class T, class LOCK, class K>
00311 w_base_t::uint4_t w_hash_t<T,LOCK, K>::bucket_for(T const* t) const {
00312     return bucket_for(_keyof(*t));
00313 }
00314 
00315 template<class T, class LOCK, class K>
00316 w_base_t::uint4_t w_hash_t<T,LOCK, K>::bucket_for(K const &k) const {
00317     return w_hash(k) & _mask;
00318 }
00319 
00320 template <class T, class LOCK, class K>
00321 w_hash_t<T,LOCK, K>&
00322 w_hash_t<T,LOCK, K>::push(T* t)
00323 {
00324     _tab[bucket_for(t)].push(t);
00325     ++_cnt;
00326     w_assert1(int(_cnt) > 0);
00327     return *this;
00328 }
00329 
00330 template <class T, class LOCK, class K>
00331 w_hash_t<T,LOCK, K>& w_hash_t<T,LOCK, K>::append(T* t)
00332 {
00333     _tab[bucket_for(t)].append(t);
00334     ++_cnt;
00335     w_assert1(int(_cnt) > 0);
00336     return *this;
00337 }
00338 
00339 template <class T, class LOCK, class K>
00340 T*
00341 w_hash_t<T,LOCK, K>::lookup(const K& k) const
00342 {
00343     w_list_t<T,LOCK>& list = _tab[bucket_for(k)];
00344     w_list_i<T,LOCK> i( list );
00345     register T* t;
00346     int4_t count;
00347     for (count = 0; (t = i.next()) && ! (_keyof(*t) == k); ++count) ;
00348     /* FRJ: disable move-to-front because (a) it's expensive and (b)
00349        lists should be short and (c) read-only operations can't go in
00350        parallel if they insist on messing with the data structure
00351      */
00352     if (0 && t && count) {
00353         w_link_t& link = _linkof(*t);
00354         link.detach();
00355         list.push(t);
00356     }
00357         
00358     return t;
00359 }
00360 template <class T, class LOCK, class K>
00361 bool
00362 w_hash_t<T,LOCK, K>::member(T const* t) const
00363 {
00364     w_list_base_t* list = &_tab[bucket_for(t)];
00365     return t->link.member_of() == list;
00366 }
00367 
00368 template <class T, class LOCK, class K>
00369 T*
00370 w_hash_t<T,LOCK, K>::remove(const K& k)
00371 {
00372     w_list_i<T,LOCK> i(_tab[bucket_for(k)]);
00373     while (i.next() && ! (_keyof(*i.curr()) == k)) ;
00374 
00375     T *tmp = i.curr();
00376     if (tmp) {
00377         --_cnt;
00378         w_assert1(int(_cnt) >= 0);
00379         _linkof(*tmp).detach();
00380     }
00381     return tmp;
00382 }
00383 
00384 template <class T, class LOCK, class K>
00385 void
00386 w_hash_t<T,LOCK, K>::remove(T* t)
00387 {
00388     w_assert1(_linkof(*t).member_of() ==
00389               &_tab[bucket_for(t)]);
00390     _linkof(*t).detach();
00391     --_cnt;
00392     w_assert1(int(_cnt) >= 0);
00393 }
00394 
00395 template <class T, class LOCK, class K>
00396 T* w_hash_i<T,LOCK, K>::next()
00397 {
00398     if (_bkt == uint4_max)  {
00399         _bkt = 0;
00400         _iter.reset(_htab._tab[_bkt++]);
00401     }
00402 
00403     if (! _iter.next())  {
00404         while (_bkt < _htab._top)  {
00405             
00406             _iter.reset( _htab._tab[ _bkt++ ] );
00407             if (_iter.next())  break;
00408         }
00409     }
00410     return _iter.curr();
00411 }
00412 
00413 /*<std-footer incl-file-exclusion='W_HASH_H'>  -- do not edit anything below this line -- */
00414 
00415 #endif          /*</std-footer>*/

Generated on Wed Jul 7 17:22:32 2010 for Shore Storage Manager by  doxygen 1.4.7