00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 #include "w_defines.h"
00054 
00055 
00056 
00057 #ifdef __GNUC__
00058 #pragma implementation "vec_t.h"
00059 #endif
00060 
00061 #define VEC_T_C
00062 #include <cstdlib>
00063 #include <cstring>
00064 #include <w_stream.h>
00065 #include <w_base.h>
00066 #include <w_minmax.h>
00067 #include "basics.h"
00068 #include "vec_t.h"
00069 #include "umemcmp.h"
00070 
00071 
00072 #ifdef EXPLICIT_TEMPLATE
00073 
00074 
00075 template class w_auto_delete_array_t<cvec_t>;
00076 template class w_auto_delete_array_t<vec_t>;
00077 #endif
00078 
00079 
00080 
00081 cvec_t                cvec_t::pos_inf;
00082 cvec_t                cvec_t::neg_inf;
00083 CADDR_T               cvec_t::zero_location=(CADDR_T)&(cvec_t::neg_inf);
00084 vec_t&                vec_t::pos_inf = *(vec_t*) &cvec_t::pos_inf;
00085 vec_t&                vec_t::neg_inf = *(vec_t*) &cvec_t::neg_inf;
00086 
00087 cvec_t::cvec_t(const cvec_t& )
00088 {
00089     cerr << "cvec_t: disabled member called" << endl;
00090     cerr << "failed at \"" << __FILE__ << ":" << __LINE__ 
00091          << "\"" << endl;
00092     abort();
00093 }
00094 
00095 cvec_t::~cvec_t()
00096 {
00097     if (_is_large()) {
00098         free(_base);
00099     }
00100 }
00101 
00102 void cvec_t::split(size_t l1, cvec_t& v1, cvec_t& v2) const
00103 {
00104     size_t min = 0;
00105     int i;
00106     for (i = 0; i < _cnt; i++)  {
00107         if (l1 > 0)  {
00108             min = (_base[i].len > l1) ? l1 : _base[i].len;
00109             v1.put(_base[i].ptr, min);
00110             l1 -= min; 
00111         }
00112         if (l1 <= 0) break;
00113     }
00114     
00115     for ( ; i < _cnt; i++)  {
00116         v2.put(_base[i].ptr + min, _base[i].len - min);
00117         min = 0;
00118     }
00119 }
00120 
00121 cvec_t& cvec_t::put(const cvec_t& v, size_t start, size_t num_bytes)
00122 {
00123     int i;
00124     size_t start_byte, start_elem, total;
00125 
00126     if (v.size() < start+num_bytes) {
00127         w_assert3(v.size() >= start+num_bytes );
00128     }
00129 
00130     
00131     for (i = 0, total = 0; i < v._cnt && total <= start; i++) {
00132         total += v._base[i].len;
00133     }
00134    
00135     
00136     if (_cnt+v._cnt > max_small) {
00137         _grow(_cnt+v._cnt);
00138     }
00139 
00140     start_elem = i-1;
00141     
00142     start_byte = start - (total - v._base[start_elem].len);
00143 
00144     
00145     _base[_cnt].ptr = v._base[start_elem].ptr+start_byte;
00146     _base[_cnt].len = v._base[start_elem].len-start_byte;
00147     _cnt++;
00148     w_assert3(_cnt <= _max_cnt()); 
00149     for (i = 1, total = _base[_cnt-1].len; total < num_bytes; i++) {
00150         _base[_cnt].ptr = v._base[start_elem+i].ptr;
00151         _base[_cnt].len = v._base[start_elem+i].len;
00152         total += _base[_cnt++].len;
00153         w_assert3(_cnt <= _max_cnt()); 
00154     }
00155     _base[_cnt - 1].len -= total-num_bytes;
00156 
00157     _size += num_bytes;
00158     w_assert3(check_size());
00159     return *this;
00160 }
00161 
00162 cvec_t& cvec_t::put(const void* p, size_t l)  
00163 {
00164     if (_cnt+1 > max_small) {
00165         _grow(_cnt+1);
00166     }
00167 
00168     
00169     _base[_cnt].ptr = (unsigned char*)p; 
00170     _base[_cnt].len = l;
00171     if(l>0) {
00172         _cnt++;
00173            w_assert3(_cnt <= _max_cnt()); 
00174         _size += l;
00175     }
00176     return *this;
00177 }
00178 
00179 bool cvec_t::check_size() const
00180 {
00181     w_assert1(_size == recalc_size());
00182     return true;
00183 }
00184 
00185 size_t cvec_t::recalc_size() const 
00186 {
00187     
00188     size_t l;
00189     int i;
00190     for (i = 0, l = 0; i < _cnt; l += _base[i++].len) ;
00191     return l;
00192 }
00193 
00194 
00195 
00196 
00197 
00198 
00199 
00200 
00201 const vec_t& vec_t::copy_from(
00202     const void* p,
00203     size_t limit,
00204     size_t offset)                 const
00205 {
00206     w_assert1(! is_pos_inf() && ! is_neg_inf() && !is_null() );
00207     w_assert1( _base[0].ptr != zero_location );
00208 
00209     char* s = (char*) p;
00210     for (int i = 0; (i < _cnt) && (limit>0); i++) {
00211         if ( offset < _base[i].len ) {
00212             size_t elemlen = ((_base[i].len - offset > limit)?
00213                                  limit : (_base[i].len - offset)) ;
00214             memcpy((char*)_base[i].ptr + offset, s, elemlen); 
00215             w_assert3(limit >= elemlen);
00216             limit -= elemlen;
00217             s += elemlen;
00218         } 
00219         if (_base[i].len > offset) {
00220             offset = 0;
00221         } else {
00222             offset -= _base[i].len;
00223         }
00224     }
00225     return *this;
00226 }
00227 
00228 
00229 size_t cvec_t::copy_to(void* p, size_t limit) const 
00230 {
00231     w_assert1(! is_pos_inf() && ! is_neg_inf() );
00232     char* s = (char*) p;
00233     for (int i = 0; i < _cnt && limit > 0; i++) {
00234         size_t n = limit > _base[i].len ? _base[i].len : limit;
00235         if(_base[i].ptr == zero_location) {
00236             memset(s, '\0', n);
00237         } else {
00238             memcpy(s, _base[i].ptr, n);
00239         }
00240         w_assert3(limit >= n);
00241         limit -= n;
00242         s += n;
00243     }
00244     return s - (char*)p;
00245 }
00246 
00247 
00248 vec_t& vec_t::copy_from(const cvec_t& s) 
00249 {
00250     bool        zeroing=false;
00251     int         j = 0;
00252     char*         p = (char*) _base[j].ptr;
00253     size_t         l = _base[j].len;
00254 
00255     w_assert1(size() >= s.size());
00256     w_assert1(_base[0].ptr != zero_location);
00257     
00258     for (int i = 0; i < s._cnt; i++)  {
00259         const unsigned char* pp = s._base[i].ptr;
00260         if(pp == zero_location) zeroing = true;
00261         size_t left = s._base[i].len;
00262         while (l <= left && j < _cnt)  {
00263             if( zeroing) {
00264                 memset(p, '\0', l);
00265             } else {
00266                 memcpy(p, pp, l);
00267             }
00268             pp += l;
00269             left -= l;
00270             j++;
00271             if (j >= _cnt) break;  
00272             l = _base[j].len;
00273             p = (char*) _base[j].ptr;
00274         }
00275         if (left)  {
00276             if( zeroing) {
00277                 memset(p, '\0', left);
00278             } else {
00279                 memcpy(p, pp, left);
00280             }
00281             pp += left;
00282             w_assert3(l >= left);
00283             l -= left;
00284         }
00285     }
00286     return *this;
00287 }
00288 
00289 vec_t& vec_t::copy_from(const cvec_t& ss, size_t offset, size_t limit, size_t myoffset)
00290 {
00291     bool        zeroing=false;
00292     vec_t s;
00293     s.put(ss, offset, limit);
00294 
00295     w_assert1(size() >= s.size());
00296     w_assert1(_base[0].ptr != zero_location);
00297 
00298     size_t ssz = s.size(), dsz = size();
00299     if (offset > ssz)                 offset = ssz;
00300     if (limit > ssz - offset)   limit = ssz - offset;
00301     if (myoffset > dsz)                offset = dsz;
00302 
00303     int j;
00304     for (j = 0; j < _cnt; j++)  {
00305         if (myoffset > _base[j].len)
00306             myoffset -= _base[j].len;
00307         else  
00308             break;
00309     }
00310     char* p = ((char*)_base[j].ptr) + myoffset;
00311     size_t l = _base[j].len - myoffset;
00312 
00313     w_assert1(dsz <= limit);
00314 
00315     size_t done;
00316     int i;
00317     for (i = 0, done = 0; i < s._cnt && done < limit; i++)  {
00318         const unsigned char* pp = s._base[i].ptr;
00319         if(pp == zero_location) zeroing = true;
00320         size_t left = s._base[i].len;
00321         if (limit - done < left)  left = limit - done;
00322         while (l < left)  {
00323             if(zeroing) {
00324                 memset(p, '\0', l);
00325             } else {
00326                 memcpy(p, pp, l);
00327             }
00328             done += l, pp += l;
00329             left -= l;
00330             j++;
00331             if (j >= _cnt) break;  
00332             l = _base[j].len;
00333             p = (char*) _base[j].ptr;
00334         }
00335         if (left)  {
00336             if(zeroing) {
00337                 memset(p, '\0', left);
00338             } else {
00339                 memcpy(p, pp, left);
00340             }
00341             pp += left;
00342             l -= left;
00343             done += left;
00344         }
00345     }
00346     return *this;
00347 }
00348 
00349 #ifdef UNDEF
00350 void cvec_t::common_prefix(
00351     const cvec_t& v1,
00352     const cvec_t& v2,
00353     cvec_t& ret)
00354 {
00355     size_t l1 = v1.size();
00356     size_t l2 = v2.size();
00357     int i1 = 0;
00358     int i2 = 0;
00359     int j1 = 0;
00360     int j2 = 0;
00361 
00362     while (l1 && l2) {
00363         if (v1._ptr[i1][j1] - v2._ptr[i2][j2])  break;
00364         if (++j1 >= v1._len[i1])        { j1 = 0; ++i1; }
00365         if (++j2 >= v2._len[i2])        { j2 = 0; ++i2; }
00366         --l1, --l2;
00367     }
00368 
00369     if (l1 < v1.size())  {
00370         ret.put(v1, 0, v1.size() - l1);
00371     }
00372 }
00373 #endif 
00374 
00375 cvec_t& cvec_t::put(const cvec_t& v)
00376 {
00377     w_assert1(! is_pos_inf() && ! is_neg_inf());
00378     if (_cnt+v._cnt > max_small) {
00379         _grow(_cnt+v._cnt);
00380     }
00381     for (int i = 0; i < v._cnt; i++)  {
00382         _base[_cnt + i].ptr = v._base[i].ptr;
00383         _base[_cnt + i].len = v._base[i].len;
00384     }
00385     _cnt += v._cnt;
00386     w_assert3(_cnt <= _max_cnt()); 
00387     _size += v._size;
00388 
00389     w_assert3(check_size());
00390     return *this;
00391 }
00392 
00393 int cvec_t::cmp(const void* s, size_t l) const
00394 {
00395     if (is_pos_inf()) return 1;
00396     if (is_neg_inf()) return -1;
00397     if (is_null()) {
00398         if(l==0) return 0;
00399         
00400         return -1;
00401     }
00402 
00403     size_t acc = 0;
00404     for (int i = 0; i < _cnt && acc < l; i++)  {
00405         int d = umemcmp(_base[i].ptr, ((char*)s) + acc,
00406                        _base[i].len < l - acc ? _base[i].len : l - acc);
00407         if (d) return d;
00408         acc += _base[i].len;
00409     }
00410     return acc - l;        
00411 }
00412 
00413 int cvec_t::cmp(const cvec_t& v, size_t* common_size) const
00414 {
00415     
00416     
00417 
00418     
00419     if (&v == this) {        
00420         if (common_size) *common_size = v.size();
00421         return 0; 
00422     }
00423 
00424     
00425     if (is_null() && !v.is_null())  return -1;
00426     if (is_neg_inf() || v.is_pos_inf())  return -1;
00427     if (is_pos_inf() || v.is_neg_inf())  return 1;
00428 
00429         
00430     
00431     
00432     
00433     
00434     
00435     
00436     
00437 
00438     int result = 0;
00439 
00440     
00441     
00442 
00443     size_t l1 = size();
00444     size_t l2 = v.size();
00445     int i1 = 0;
00446     int i2 = 0;
00447     size_t j1 = 0, j2 = 0;
00448     
00449     while (l1 && l2)  {
00450         w_assert3(i1 < _cnt);
00451         w_assert3(i2 < v._cnt);
00452         size_t min = _base[i1].len - j1;
00453         if (v._base[i2].len - j2 < min)  min = v._base[i2].len - j2;
00454 
00455         w_assert3(min > 0);
00456         result = umemcmp(&_base[i1].ptr[j1], &v._base[i2].ptr[j2], min);
00457         if (result) break;
00458         
00459         if ((j1 += min) >= _base[i1].len)        { j1 = 0; ++i1; }
00460         if ((j2 += min) >= v._base[i2].len)        { j2 = 0; ++i2; }
00461 
00462         l1 -= min, l2 -= min;
00463     }
00464 
00465     if (result)  {
00466         if (common_size)  {
00467             while (_base[i1].ptr[j1] == v._base[i2].ptr[j2])  {
00468                 ++j1, ++j2;
00469                 --l1;
00470             }
00471             *common_size = (l1 < size() ? size() - l1 : 0);
00472         }
00473     } else {
00474         result = l1 - l2; 
00475         if (result && common_size)  {
00476             
00477             if (l1 == 0) {
00478                 *common_size = size();
00479             } else {
00480                 w_assert3(l2 == 0);
00481                 *common_size = v.size();
00482             }
00483             w_assert3(*common_size == MIN(size(), v.size()));
00484         }
00485     }
00486     return result;
00487 }
00488 
00489 int cvec_t::checksum() const 
00490 {
00491     int sum = 0;
00492     w_assert1(! is_pos_inf() && ! is_neg_inf());
00493     for (int i = 0; i < _cnt; i++) {
00494         for(size_t j=0; j<_base[i].len; j++) sum += ((char*)_base[i].ptr)[j];
00495     }
00496     return sum;
00497 }
00498 
00499 void cvec_t::calc_kvl(w_base_t::uint4_t& rh) const
00500 {
00501     if (size() <= sizeof(w_base_t::uint4_t))  {
00502         rh = 0;
00503         copy_to(&rh, size());
00504     } else {
00505         w_base_t::uint4_t h = 0;
00506         for (int i = 0; i < _cnt; i++)  {
00507             const unsigned char* s = _base[i].ptr;
00508             for (size_t j = 0; j < _base[i].len; j++)  {
00509                 if (h & 0xf8000000)  
00510                 {
00511                     h = (h & ~0xf8000000) + (h >> 27);
00512                 }
00513                 h = (h << 5) + *s++;
00514             }
00515         }
00516         rh = h;
00517     }
00518 }
00519 
00520 void cvec_t::_grow(int total_cnt)
00521 {
00522     w_assert3(total_cnt > max_small);
00523     w_assert3(_cnt <= _max_cnt()); 
00524 
00525     int prev_max = _max_cnt();
00526   
00527     if (total_cnt > prev_max) {
00528         
00529 
00530         int grow_to = MAX(prev_max*2, total_cnt);
00531         vec_pair_t* tmp = NULL;
00532 
00533         if (_is_large()) {
00534             tmp = (vec_pair_t*) realloc((char*)_base, grow_to * sizeof(*_base));
00535             if (!tmp) W_FATAL(fcOUTOFMEMORY);
00536         } else {
00537             tmp = (vec_pair_t*) malloc(grow_to * sizeof(*_base));
00538             if (!tmp) W_FATAL(fcOUTOFMEMORY);
00539             for (int i = 0; i < prev_max; i++) {
00540                 tmp[i] = _base[i];
00541             }
00542         }
00543         _pair[0].len = grow_to;
00544         _base = tmp;
00545     }
00546 }
00547 
00548 #include <cctype>
00549 
00550 ostream& operator<<(ostream& o, const cvec_t& v)
00551 {
00552     char        *p;
00553     u_char         c, oldc;
00554     int                repeated;
00555     int                nparts = v.count();
00556     int                i = 0;
00557     size_t        j = 0;
00558     size_t        l = 0;
00559 
00560     o << "{ ";
00561     for(i=0; i< nparts; i++) {
00562         
00563         l = (i < v._cnt) ? v._base[i].len : 0;
00564 
00565         
00566         p = (i < v._cnt) ? (char *)v._base[i].ptr : (char *)NULL; 
00567 
00568         o << "{" << l << " " << "\"" ;
00569 
00570         repeated=0;
00571         oldc = '\0';
00572         for(j=0; j<l; j++,p++) {
00573             c = *p;
00574             if(c == oldc) {
00575                 repeated++;
00576             } else {
00577                 if(repeated>0) {
00578                     o << "<" <<repeated << " times>";
00579                     repeated = 0;
00580                 }
00581                 if( c == '"' ) {
00582                     o << "\\\"" ;
00583                 } else if( isprint(c) ) {
00584                     if( isascii(c) ) {
00585                         o << c ;
00586                     } else {
00587                         
00588                         o << "\\0" << oct << c << dec ;
00589                     }
00590                 } else if(c=='\0') {
00591                     o << "\\0" ;
00592                 } else {
00593                     o << "\\0" << oct << (unsigned int)c << dec ;
00594                 }
00595             }
00596             oldc = c;
00597         }
00598         if(repeated>0) {
00599             o << "<" <<repeated << " times>";
00600             repeated = 0;
00601         }
00602         o <<"\" }";
00603         w_assert3(j==l);
00604         w_assert3(j==v._base[i].len);
00605     }
00606     o <<"}";
00607     return o;
00608 }
00609 
00610 istream& operator>>(istream& is, cvec_t& v)
00611 {
00612     char        c=' ';
00613     size_t      len=0;
00614     int         err = 0;
00615     char        *temp = 0;
00616     const char leftbracket='{';
00617     const char rightbracket='}';
00618 
00619     is.clear();
00620     v.reset();
00621 
00622     enum        {
00623         starting=0,
00624         getting_nparts, got_nparts,
00625         getting_pair, got_len,got_string,
00626         done
00627     } state;
00628 
00629     state = starting;
00630     while(state != done) {
00631         is >> ws; 
00632         c = is.peek();
00633         
00634 
00635 
00636 
00637 
00638 
00639 
00640 
00641 
00642 
00643         if(is.eof()) {
00644             err ++;
00645         } else {
00646             switch(state) {
00647             case done:
00648                 break;
00649 
00650             case starting:
00651                 if(c==leftbracket) {
00652                     is >> c;
00653                     state = getting_nparts;
00654                 } else err ++;
00655                 break;
00656 
00657             case getting_nparts:
00658                 is >> ws; 
00659                 if(is.bad()) { err ++; }
00660                 else state = got_nparts;
00661                 break;
00662 
00663             case got_nparts:
00664                 is >> ws; 
00665                 if(is.peek() == leftbracket) {
00666                     state = getting_pair;
00667                 } else {
00668                     err ++;
00669                 }
00670                 break;
00671 
00672             case getting_pair:
00673                 is >> ws; 
00674                 is >> c;
00675                 if( c == leftbracket ) {
00676                     is >> ws; 
00677                     is >> len; 
00678                     if(is.bad()) { 
00679                         err ++;
00680                     } else state = got_len;
00681                 } else if( c== rightbracket ) {
00682                     state = done;
00683                 } else {
00684                     err ++;
00685                 } 
00686                 break;
00687 
00688             case got_len:
00689                 if(c == '"') {
00690                     (void) is.get();
00691 
00692                     char *t;
00693                     
00694                     
00695                     temp = new char[len];
00696                     size_t j = 0;
00697                     for(t=temp; j<len; j++, t++) {
00698                         is >> *t;
00699                     }
00700                     state = got_string;
00701                     c = is.peek();
00702                 }
00703                 if(c != '"') {
00704                     err++;
00705                 } else {
00706                     (void) is.get();
00707                 }
00708                 break;
00709 
00710             case got_string:
00711                 is >> c; 
00712                 if(c != rightbracket ) {
00713                     err ++;
00714                 } else {
00715                     v.put(temp, len);
00716                     
00717                     
00718                     temp = 0;
00719                     len = 0;
00720                     state = getting_pair;
00721                 }
00722                 break;
00723 
00724             }
00725             
00726 
00727 
00728 
00729 
00730 
00731 
00732 
00733 
00734         }
00735         if(err >0) {
00736 #if defined(__SUNPRO_CC)
00737             
00738             is.clear(is.rdstate() | ios::badbit);
00739 #elif defined(__GNUC__)
00740 #if W_GCC_THIS_VER >= W_GCC_VER(3,0)
00741             
00742             is.setstate(ios::badbit);
00743 #else
00744             is.set(ios::badbit);
00745 #endif
00746 #else
00747             is.set(ios::badbit);
00748 #endif
00749 
00750             state = done;
00751             err = is.tellg();
00752             
00753         }
00754     }
00755     return is;
00756 }
00757 
00758