00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 #include "w.h"
00055 #include "mem_block.h"
00056 #include "atomic_ops.h"
00057 #include <cstdlib>
00058 #include <stdio.h>
00059 #include <algorithm>
00060 #ifdef __linux
00061 #include <malloc.h>
00062 #endif
00063 
00064 
00065 #undef assert
00066 void assert_failed(const char *desc, const char *f, int l) {
00067     fprintf(stdout, "Assertion failed: %s at line %d file %s ", desc, l,f);
00068     w_assert0(0);
00069 }
00070 #define assert(x)   if (!(x)) assert_failed(#x, __FILE__, __LINE__);
00071 
00072 #define TEMPLATE_ARGS chip_size, chip_count, block_size
00073 
00074 namespace memory_block {
00075 #if 0
00076 } 
00077 #endif
00078 
00079 
00080 typedef unsigned long long u64;
00081 static inline
00082 long popc64(u64 x) {
00083     u64 k1 = 0x5555555555555555ull;
00084     u64 k2 = 0x3333333333333333ull;
00085     u64 k4 = 0x0f0f0f0f0f0f0f0full;
00086     u64 kf = 0x0101010101010101ull;
00087     x =  x       - ((x >> 1)  & k1); 
00088     x = (x & k2) + ((x >> 2)  & k2); 
00089     x = (x       +  (x >> 4)) & k4 ; 
00090     x = (x * kf) >> 56; 
00091     return x;
00092 }
00093 
00094 size_t        block_bits::_popc(bitmap bm) {
00095 #ifdef __GNUC__
00096     
00097 #if defined(__x86_64) || defined(i386) || defined(__i386__)
00098 
00099     return __builtin_popcountll(bm);
00100     
00101 #elif defined(__sparcv9)
00102 #warning "Using gcc inline asm to access sparcv9 'popc' instruction"
00103     long rval;
00104     __asm__("popc    %[in], %[out]" : [out] "=r"(rval) : [in] "r"(x));
00105     return rval;
00106 #else
00107 #warning "using home-brew popc routine"
00108     return popc64(bm);
00109 #endif
00110     
00111 #else // !defined(__GNUC__)
00112 #warning "using home-brew popc routine"
00113     return popc64(bm);
00114 #endif
00115 }
00116 
00117 block_bits::block_bits(size_t chip_count)
00118     : _usable_chips(create_mask(chip_count))
00119     , _zombie_chips(0)
00120 {
00121     assert(chip_count <= 8*sizeof(bitmap));
00122 }
00123 
00124 size_t block_bits::acquire(size_t chip_count) {
00125     (void) chip_count; 
00126     
00127     
00128 
00129 
00130 
00131 
00132 
00133 
00134 
00135 
00136     bitmap one_bit = _usable_chips &- _usable_chips;
00137     size_t index = _popc(one_bit-1);
00138     if(index < 8*sizeof(bitmap)) {
00139         
00140         assert(index < chip_count);
00141         _usable_chips ^= one_bit;
00142     }
00143     else {
00144         
00145         assert(index == 8*sizeof(bitmap));
00146     }
00147 
00148     return index;
00149 }
00150 
00151 void block_bits::release(size_t index, size_t chip_count) {
00152     
00153     (void) chip_count; 
00154     assert(index < chip_count);
00155     bitmap to_free = bitmap(1) << index;
00156     assert(! (to_free & *usable_chips()));
00157 #if I_WISH
00158     bitmap was_free = __sync_fetch_and_or(&_zombie_chips, to_free);
00159 #else
00160     membar_exit();
00161     bitmap volatile* ptr = &_zombie_chips;
00162     bitmap ov = *ptr;
00163     while(1) {
00164         bitmap nv = ov | to_free;
00165         bitmap cv = atomic_cas_64(ptr, ov, nv);
00166         if(cv == ov)
00167             break;
00168         ov = cv;
00169     }
00170     bitmap was_free = ov;
00171 #endif
00172     (void) was_free; 
00173     assert(! (was_free & to_free));
00174 }
00175 
00176 block_bits::bitmap block_bits::create_mask(size_t bits_set) {
00177     
00178     return ~bitmap(0) >> (8*sizeof(bitmap) - bits_set);
00179 }
00180 
00181 void block_bits::recycle() {
00182     
00183 
00184 
00185 
00186 
00187 
00188 
00189 
00190     bitmap newly_usable = *&_zombie_chips;
00191     _usable_chips |= newly_usable;
00192 #if I_WISH
00193     __sync_xor_and_fetch(&_zombie_chips, newly_usable);
00194 #else
00195     membar_exit();
00196     bitmap volatile* ptr = &_zombie_chips;
00197     bitmap ov = *ptr;
00198     while(1) {
00199         bitmap nv = ov ^ newly_usable;
00200         bitmap cv = atomic_cas_64(ptr, ov, nv);
00201         if(cv == ov)
00202             break;
00203         ov = cv;
00204     }
00205 #endif
00206 }
00207 
00208 void* block::acquire(size_t chip_size, size_t chip_count, size_t ) {
00209     size_t index = _bits.acquire(chip_count);
00210     return (index < chip_count)? _get(index, chip_size) : 0;
00211 }
00212 
00213 void block::release(void* ptr, size_t chip_size, size_t chip_count, size_t block_size)
00214 {
00215     
00216 
00217 
00218 
00219 
00220 
00221     union { void* v; size_t n; block* b; char* c; } u = {ptr}, v=u;
00222     u.n &= -block_size;
00223     size_t offset = v.c - u.b->_data;
00224     size_t idx = offset/chip_size;
00225     assert(u.b->_data + idx*chip_size == ptr);
00226     u.b->_bits.release(idx, chip_count);
00227 }
00228 
00229 char* block::_get(size_t index, size_t chip_size) {
00230     return _data + index*chip_size;
00231 }
00232 
00233 block::block(size_t chip_size, size_t chip_count, size_t block_size)
00234     : _bits(chip_count)
00235     , _owner(0)
00236     , _next(0)
00237 {
00238     
00239     char* end_of_block = _get(0, chip_size)-sizeof(this)+block_size;
00240     char* end_of_chips = _get(chip_count, chip_size);
00241     (void) end_of_block; 
00242     (void) end_of_chips; 
00243     assert(end_of_chips <= end_of_block);
00244     
00245     
00246 
00247 
00248 
00249 
00250 }
00251 
00252 void* block_list::acquire(size_t chip_size, size_t chip_count, size_t block_size)
00253 {
00254     if(void* ptr = _tail->acquire(TEMPLATE_ARGS)) {
00255         _hit_count++;
00256         return ptr;
00257     }
00258 
00259     
00260     return _slow_acquire(TEMPLATE_ARGS);
00261 }
00262 
00263 
00264 block_list::block_list(block_pool* pool, size_t chip_size, size_t chip_count, size_t block_size)
00265     : _fake_block(TEMPLATE_ARGS)
00266     , _tail(&_fake_block)
00267     , _pool(pool)
00268     , _hit_count(0)
00269     , _avg_hit_rate(0)
00270 {
00271     
00272 
00273 
00274 
00275 
00276 
00277 
00278 
00279 
00280 
00281     _fake_block._bits._usable_chips = 0;
00282     _fake_block._bits._zombie_chips = 0;
00283 }
00284 
00285 
00286 void* block_list::_slow_acquire(size_t chip_size, size_t chip_count, size_t block_size)
00287 {
00288     _change_blocks(TEMPLATE_ARGS);
00289     return acquire(TEMPLATE_ARGS);
00290 }
00291 
00292 block* block_list::acquire_block(size_t block_size)
00293 {
00294     union { block* b; uintptr_t n; } u = {_pool->acquire_block(this)};
00295     (void) block_size; 
00296     assert((u.n & -block_size) == u.n);
00297     return u.b;
00298     
00299 }
00300 void block_list::_change_blocks(size_t chip_size, size_t chip_count, size_t block_size)
00301 {
00302     (void) chip_size; 
00303 
00304     
00305     if(_tail == &_fake_block) {
00306     _tail = acquire_block(block_size);
00307     _tail->_next = _tail;
00308     return;
00309     }
00310     
00311     
00312 
00313 
00314 
00315 
00316 
00317 
00318 
00319 
00320 
00321 
00322 
00323 
00324     static double const    decay_rate = 1./5; 
00325     
00326     size_t const max_available = chip_count - std::max((int)(.1*chip_count), 1);
00327     
00328     size_t const min_allocated = (chip_count+1)/2;
00329     
00330     _avg_hit_rate = _hit_count*(1-decay_rate) + _avg_hit_rate*decay_rate;
00331     if(_hit_count < min_allocated && _avg_hit_rate < min_allocated) {
00332         
00333         block* new_block = acquire_block(block_size);
00334         new_block->_next = _tail->_next;
00335         _tail = _tail->_next = new_block;
00336     }
00337     else {
00338         
00339         block* prev = 0;
00340         block* cur = _tail;
00341         block* next;
00342         while(1) {
00343             next = cur->_next;
00344             next->recycle();
00345             if(next->_bits.usable_count() <= max_available)
00346             break;
00347             
00348             
00349             if(prev) {
00350             assert(prev != cur);
00351             assert(cur->_bits.usable_count() > max_available);
00352             assert(next->_bits.usable_count() > max_available);
00353             prev->_next = next;
00354             _pool->release_block(cur);
00355             cur = prev; 
00356             }
00357 
00358             
00359             if(next == _tail)
00360             break;
00361             
00362             prev = cur;
00363             cur = next;
00364         }
00365 
00366         
00367         _tail = cur;
00368     }
00369 
00370     _hit_count = 0;
00371 }
00372 
00373 block_list::~block_list() {
00374     
00375     if(_tail == &_fake_block) return;
00376     
00377     
00378     block* cur = _tail->_next;
00379     _tail->_next = 0;
00380 
00381     
00382     while( (cur=_pool->release_block(cur)) );
00383 }
00384 
00385 
00386 }