00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00043 #include "fastlib/file/textfile.h"
00044
00045
00046 #include <ctype.h>
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096 void TextLineReader::Error(const char *format, ...) {
00097 va_list vl;
00098
00099
00100 fprintf(stderr, ".| %d: %s\nX| `-> ", line_num_, line_.c_str());
00101
00102 va_start(vl, format);
00103 vfprintf(stderr, format, vl);
00104 va_end(vl);
00105
00106 fprintf(stderr, "\n");
00107 }
00108
00109 success_t TextLineReader::Open(const char *fname) {
00110 f_ = fopen(fname, "r");
00111 line_num_ = 0;
00112 has_line_ = false;
00113 line_.Init();
00114
00115 if (unlikely(f_ == NULL)) {
00116 return SUCCESS_FAIL;
00117 } else {
00118 Gobble();
00119 return SUCCESS_PASS;
00120 }
00121 }
00122
00123 bool TextLineReader::Gobble() {
00124 char *ptr = ReadLine_();
00125
00126 line_.Destruct();
00127
00128 if (likely(ptr != NULL)) {
00129 line_.Steal(ptr);
00130 has_line_ = true;
00131 line_num_++;
00132 return true;
00133 } else {
00134 line_.Init();
00135 has_line_ = false;
00136 return false;
00137 }
00138 }
00139
00140 char *TextLineReader::ReadLine_() {
00141 char *buf = NULL;
00142 size_t size = 1;
00143 size_t len = 0;
00144 #ifdef DEBUG
00145 const size_t extra = 10;
00146 #else
00147 const size_t extra = 80;
00148 #endif
00149
00150 for (;;) {
00151 size = size * 2 + extra;
00152 buf = mem::Realloc(buf, size);
00154 char *result = ::fgets(buf + len, size - len, f_);
00155 if (len == 0 && result == NULL) {
00156 mem::Free(buf);
00157 return NULL;
00158 }
00159 len += strlen(buf + len);
00160
00161 if (len < size - 1 || buf[len - 1] == '\r' || buf[len - 1] == '\n') {
00162 while (len && (buf[len-1] == '\r' || buf[len-1] == '\n')) {
00163 len--;
00164 }
00165 buf[len] = '\0';
00166 return buf;
00167 }
00168 }
00169 }
00170
00171 success_t TextTokenizer::Open(const char *fname,
00172 const char *comment_chars_in, const char *ident_extra_in,
00173 int features_in) {
00174 next_.Copy("");
00175 cur_.Copy("");
00176 next_type_ = END;
00177 cur_type_ = END;
00178 comment_start_ = comment_chars_in;
00179 features_ = features_in;
00180 ident_extra_ = ident_extra_in;
00181 line_ = 1;
00182
00183 f_ = fopen(fname, "r");
00184
00185 if (unlikely(f_ == NULL)) {
00186 return SUCCESS_FAIL;
00187 } else {
00188 Gobble();
00189 return SUCCESS_PASS;
00190 }
00191 }
00192
00193 char TextTokenizer::NextChar_() {
00194 int c = GetChar_();
00195
00196 if (c != EOF && unlikely(strchr(comment_start_, c) != NULL)) {
00197 do {
00198 c = GetChar_();
00199 } while (likely(c != EOF) && likely(c != '\r') && likely(c != '\n'));
00200 }
00201
00202 if (unlikely(c == EOF)) {
00203 c = 0;
00204 }
00205
00206 return c;
00207 }
00208
00209 char TextTokenizer::NextChar_(ArrayList<char> *token) {
00210 char c = NextChar_();
00211
00212 token->PushBackCopy(c);
00213
00214 return c;
00215 }
00216
00217 char TextTokenizer::Skip_(ArrayList<char> *token) {
00218 int c;
00219
00220 while (1) {
00221 c = NextChar_();
00222 if (!isspace(c)) {
00223 break;
00224 }
00225
00226 if (c == '\r' || c == '\n') {
00227 if (c == '\r') {
00228 c = NextChar_();
00229 if (c != '\n') {
00230 Unget_(c);
00231 }
00232 }
00233 line_++;
00234 if ((features_ & WANT_NEWLINE)) {
00235 c = '\n';
00236 break;
00237 }
00238 }
00239 }
00240
00241 token->PushBackCopy(char(c));
00242
00243 return char(c);
00244 }
00245
00246 void TextTokenizer::UndoNextChar_(ArrayList<char> *token) {
00247 char c;
00248 token->PopBackInit(&c);
00249 if (c != 0) {
00250 Unget_(c);
00251 }
00252 }
00253
00254 void Sanitize(const String& src, String* dest) {
00255 dest->Init();
00256
00257 for (index_t i = 0; i < src.length(); i++) {
00258 char c = src[i];
00259
00260 if (isgraph(c) || c == ' ' || c == '\t') {
00261 *dest += c;
00262 } else if (isspace(c)) {
00263 *dest += "<whitespace>";
00264 } else {
00265 *dest += "<nonprint>";
00266 }
00267 }
00268 }
00269
00270 void TextTokenizer::Error(const char *format, ...) {
00271 va_list vl;
00272 String cur_sanitized;
00273 String next_sanitized;
00274
00275 Sanitize(cur_, &cur_sanitized);
00276 Sanitize(next_, &next_sanitized);
00277
00278
00279 fprintf(stderr, ".| %d: %s <-HERE-> %s\nX| `-> ", line_,
00280 cur_sanitized.c_str(), next_sanitized.c_str());
00281
00282 va_start(vl, format);
00283 vfprintf(stderr, format, vl);
00284 va_end(vl);
00285
00286 fprintf(stderr, "\n");
00287 }
00288
00289 void TextTokenizer::Error_(const char *msg, const ArrayList<char>& token) {
00290 next_type_ = INVALID;
00291
00292 printf("size is %"LI"d, token[0] = %d\n", token.size(), token[0]);
00293 next_.Copy(token.begin(), token.size());
00294 Error("%s", msg);
00295 next_.Destruct();
00296 }
00297
00298 void TextTokenizer::ScanNumber_(char c, ArrayList<char> *token) {
00299 bool dot = false;
00300 bool floating = false;
00301
00302 while (1) {
00303 if (unlikely(c == '.')) {
00304
00305 if (unlikely(dot)) {
00306 Error_("Multiple decimal points in a float", *token);
00307 return;
00308 }
00309 dot = true;
00310 floating = true;
00311 } else if (likely(isdigit(c))) {
00312
00313 } else if (unlikely(c == 'e' || c == 'E')) {
00314
00315 c = NextChar_(token);
00316 if (c == '+' || c == '-') {
00317 c = NextChar_(token);
00318 }
00319 while (isdigit(c)) {
00320 c = NextChar_(token);
00321 }
00322 floating = true;
00323 break;
00324 } else {
00325
00326 break;
00327 }
00328
00329 c = NextChar_(token);
00330 }
00331
00332 if (c == 'f' || c == 'F') {
00333
00334 floating = true;
00335 } else if (isspace(c) || ispunct(c)) {
00336 UndoNextChar_(token);
00337 } else {
00338 Error_("Invalid character while parsing number", *token);
00339 }
00340
00341 if (floating) {
00342 next_type_ = DOUBLE;
00343 } else {
00344 next_type_ = INTEGER;
00345 }
00346 }
00347
00348 void TextTokenizer::ScanString_(char ending, ArrayList<char> *token) {
00349 int c;
00350
00351 while (1) {
00352 c = NextChar_(token);
00353
00354 if (c == 0) {
00355 Error_("Unterminated String", *token);
00356 UndoNextChar_(token);
00357 return;
00358 }
00359
00360 if (c == ending) {
00361 next_type_ = STRING;
00362 return;
00363 }
00364 }
00365 }
00366
00367 void TextTokenizer::Scan_(ArrayList<char> *token) {
00368 char c = Skip_(token);
00369
00370 if (c == 0) {
00371 token->Clear();
00372 next_type_ = END;
00373 return;
00374 } else if (c == '.' || isdigit(c)) {
00375 ScanNumber_(c, token);
00376 } else if (isident_begin_(c)) {
00377 while (isident_rest_(NextChar_(token))) {}
00378 UndoNextChar_(token);
00379 next_type_ = IDENTIFIER;
00380 } else if (ispunct(c) || isspace(c)) {
00381 if (c == '"' || c == '\'') {
00382 ScanString_(c, token);
00383 } else if (c == '+' || c == '-') {
00384 c = NextChar_(token);
00385 if (c == '.' || isdigit(c)) {
00386 ScanNumber_(c, token);
00387 } else {
00388 UndoNextChar_(token);
00389 }
00390 } else {
00391 next_type_ = PUNCT;
00392 }
00393 } else {
00394 Error_("Unknown Character", *token);
00395 }
00396 }
00397
00398 void TextTokenizer::Gobble() {
00399 cur_.Destruct();
00400 cur_.StealDestruct(&next_);
00401 cur_type_ = next_type_;
00402
00403 ArrayList<char> token;
00404 token.Init();
00405 Scan_(&token);
00406 token.PushBackCopy('\0');
00407 next_.Steal(&token);
00408 DEBUG_ASSERT(next_.length() == index_t(strlen(next_.c_str())));
00409 }
00410
00411 success_t TextWriter::Printf(const char *format, ...) {
00412 int rv;
00413
00414 va_list vl;
00415
00416 va_start(vl, format);
00417 rv = vfprintf(f_, format, vl);
00418 va_end(vl);
00419
00420 return SUCCESS_FROM_C(rv);
00421 }
00422