00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 #include "w.h"
00055 #include "mem_block.h"
00056 #include "atomic_ops.h"
00057 #include <cstdlib>
00058 #include <stdio.h>
00059 #include <algorithm>
00060 #ifdef __linux
00061 #include <malloc.h>
00062 #endif
00063
00064
00065 #undef assert
00066 void assert_failed(const char *desc, const char *f, int l) {
00067 fprintf(stdout, "Assertion failed: %s at line %d file %s ", desc, l,f);
00068 w_assert0(0);
00069 }
00070 #define assert(x) if (!(x)) assert_failed(#x, __FILE__, __LINE__);
00071
00072 #define TEMPLATE_ARGS chip_size, chip_count, block_size
00073
00074 namespace memory_block {
00075 #if 0
00076 }
00077 #endif
00078
00079
00080 typedef unsigned long long u64;
00081 static inline
00082 long popc64(u64 x) {
00083 u64 k1 = 0x5555555555555555ull;
00084 u64 k2 = 0x3333333333333333ull;
00085 u64 k4 = 0x0f0f0f0f0f0f0f0full;
00086 u64 kf = 0x0101010101010101ull;
00087 x = x - ((x >> 1) & k1);
00088 x = (x & k2) + ((x >> 2) & k2);
00089 x = (x + (x >> 4)) & k4 ;
00090 x = (x * kf) >> 56;
00091 return x;
00092 }
00093
00094 size_t block_bits::_popc(bitmap bm) {
00095 #ifdef __GNUC__
00096
00097 #if defined(__x86_64) || defined(i386) || defined(__i386__)
00098
00099 return __builtin_popcountll(bm);
00100
00101 #elif defined(__sparcv9)
00102 #warning "Using gcc inline asm to access sparcv9 'popc' instruction"
00103 long rval;
00104 __asm__("popc %[in], %[out]" : [out] "=r"(rval) : [in] "r"(x));
00105 return rval;
00106 #else
00107 #warning "using home-brew popc routine"
00108 return popc64(bm);
00109 #endif
00110
00111 #else // !defined(__GNUC__)
00112 #warning "using home-brew popc routine"
00113 return popc64(bm);
00114 #endif
00115 }
00116
00117 block_bits::block_bits(size_t chip_count)
00118 : _usable_chips(create_mask(chip_count))
00119 , _zombie_chips(0)
00120 {
00121 assert(chip_count <= 8*sizeof(bitmap));
00122 }
00123
00124 size_t block_bits::acquire(size_t chip_count) {
00125 (void) chip_count;
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136 bitmap one_bit = _usable_chips &- _usable_chips;
00137 size_t index = _popc(one_bit-1);
00138 if(index < 8*sizeof(bitmap)) {
00139
00140 assert(index < chip_count);
00141 _usable_chips ^= one_bit;
00142 }
00143 else {
00144
00145 assert(index == 8*sizeof(bitmap));
00146 }
00147
00148 return index;
00149 }
00150
00151 void block_bits::release(size_t index, size_t chip_count) {
00152
00153 (void) chip_count;
00154 assert(index < chip_count);
00155 bitmap to_free = bitmap(1) << index;
00156 assert(! (to_free & *usable_chips()));
00157 #if I_WISH
00158 bitmap was_free = __sync_fetch_and_or(&_zombie_chips, to_free);
00159 #else
00160 membar_exit();
00161 bitmap volatile* ptr = &_zombie_chips;
00162 bitmap ov = *ptr;
00163 while(1) {
00164 bitmap nv = ov | to_free;
00165 bitmap cv = atomic_cas_64(ptr, ov, nv);
00166 if(cv == ov)
00167 break;
00168 ov = cv;
00169 }
00170 bitmap was_free = ov;
00171 #endif
00172 (void) was_free;
00173 assert(! (was_free & to_free));
00174 }
00175
00176 block_bits::bitmap block_bits::create_mask(size_t bits_set) {
00177
00178 return ~bitmap(0) >> (8*sizeof(bitmap) - bits_set);
00179 }
00180
00181 void block_bits::recycle() {
00182
00183
00184
00185
00186
00187
00188
00189
00190 bitmap newly_usable = *&_zombie_chips;
00191 _usable_chips |= newly_usable;
00192 #if I_WISH
00193 __sync_xor_and_fetch(&_zombie_chips, newly_usable);
00194 #else
00195 membar_exit();
00196 bitmap volatile* ptr = &_zombie_chips;
00197 bitmap ov = *ptr;
00198 while(1) {
00199 bitmap nv = ov ^ newly_usable;
00200 bitmap cv = atomic_cas_64(ptr, ov, nv);
00201 if(cv == ov)
00202 break;
00203 ov = cv;
00204 }
00205 #endif
00206 }
00207
00208 void* block::acquire(size_t chip_size, size_t chip_count, size_t ) {
00209 size_t index = _bits.acquire(chip_count);
00210 return (index < chip_count)? _get(index, chip_size) : 0;
00211 }
00212
00213 void block::release(void* ptr, size_t chip_size, size_t chip_count, size_t block_size)
00214 {
00215
00216
00217
00218
00219
00220
00221 union { void* v; size_t n; block* b; char* c; } u = {ptr}, v=u;
00222 u.n &= -block_size;
00223 size_t offset = v.c - u.b->_data;
00224 size_t idx = offset/chip_size;
00225 assert(u.b->_data + idx*chip_size == ptr);
00226 u.b->_bits.release(idx, chip_count);
00227 }
00228
00229 char* block::_get(size_t index, size_t chip_size) {
00230 return _data + index*chip_size;
00231 }
00232
00233 block::block(size_t chip_size, size_t chip_count, size_t block_size)
00234 : _bits(chip_count)
00235 , _owner(0)
00236 , _next(0)
00237 {
00238
00239 char* end_of_block = _get(0, chip_size)-sizeof(this)+block_size;
00240 char* end_of_chips = _get(chip_count, chip_size);
00241 (void) end_of_block;
00242 (void) end_of_chips;
00243 assert(end_of_chips <= end_of_block);
00244
00245
00246
00247
00248
00249
00250 }
00251
00252 void* block_list::acquire(size_t chip_size, size_t chip_count, size_t block_size)
00253 {
00254 if(void* ptr = _tail->acquire(TEMPLATE_ARGS)) {
00255 _hit_count++;
00256 return ptr;
00257 }
00258
00259
00260 return _slow_acquire(TEMPLATE_ARGS);
00261 }
00262
00263
00264 block_list::block_list(block_pool* pool, size_t chip_size, size_t chip_count, size_t block_size)
00265 : _fake_block(TEMPLATE_ARGS)
00266 , _tail(&_fake_block)
00267 , _pool(pool)
00268 , _hit_count(0)
00269 , _avg_hit_rate(0)
00270 {
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281 _fake_block._bits._usable_chips = 0;
00282 _fake_block._bits._zombie_chips = 0;
00283 }
00284
00285
00286 void* block_list::_slow_acquire(size_t chip_size, size_t chip_count, size_t block_size)
00287 {
00288 _change_blocks(TEMPLATE_ARGS);
00289 return acquire(TEMPLATE_ARGS);
00290 }
00291
00292 block* block_list::acquire_block(size_t block_size)
00293 {
00294 union { block* b; uintptr_t n; } u = {_pool->acquire_block(this)};
00295 (void) block_size;
00296 assert((u.n & -block_size) == u.n);
00297 return u.b;
00298
00299 }
00300 void block_list::_change_blocks(size_t chip_size, size_t chip_count, size_t block_size)
00301 {
00302 (void) chip_size;
00303
00304
00305 if(_tail == &_fake_block) {
00306 _tail = acquire_block(block_size);
00307 _tail->_next = _tail;
00308 return;
00309 }
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324 static double const decay_rate = 1./5;
00325
00326 size_t const max_available = chip_count - std::max((int)(.1*chip_count), 1);
00327
00328 size_t const min_allocated = (chip_count+1)/2;
00329
00330 _avg_hit_rate = _hit_count*(1-decay_rate) + _avg_hit_rate*decay_rate;
00331 if(_hit_count < min_allocated && _avg_hit_rate < min_allocated) {
00332
00333 block* new_block = acquire_block(block_size);
00334 new_block->_next = _tail->_next;
00335 _tail = _tail->_next = new_block;
00336 }
00337 else {
00338
00339 block* prev = 0;
00340 block* cur = _tail;
00341 block* next;
00342 while(1) {
00343 next = cur->_next;
00344 next->recycle();
00345 if(next->_bits.usable_count() <= max_available)
00346 break;
00347
00348
00349 if(prev) {
00350 assert(prev != cur);
00351 assert(cur->_bits.usable_count() > max_available);
00352 assert(next->_bits.usable_count() > max_available);
00353 prev->_next = next;
00354 _pool->release_block(cur);
00355 cur = prev;
00356 }
00357
00358
00359 if(next == _tail)
00360 break;
00361
00362 prev = cur;
00363 cur = next;
00364 }
00365
00366
00367 _tail = cur;
00368 }
00369
00370 _hit_count = 0;
00371 }
00372
00373 block_list::~block_list() {
00374
00375 if(_tail == &_fake_block) return;
00376
00377
00378 block* cur = _tail->_next;
00379 _tail->_next = 0;
00380
00381
00382 while( (cur=_pool->release_block(cur)) );
00383 }
00384
00385
00386 }