root/gc/cord/cordxtra.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. CORD_cat_char
  2. CORD_catn
  3. CORD_fill_data
  4. CORD_fill_proc
  5. CORD_batched_fill_proc
  6. CORD_fill_buf
  7. CORD_cmp
  8. CORD_ncmp
  9. CORD_to_char_star
  10. CORD_from_char_star
  11. CORD_to_const_char_star
  12. CORD_fetch
  13. CORD_put_proc
  14. CORD_batched_put_proc
  15. CORD_put
  16. chr_data
  17. CORD_chr_proc
  18. CORD_rchr_proc
  19. CORD_batched_chr_proc
  20. CORD_chr
  21. CORD_rchr
  22. CORD_str
  23. CORD_ec_flush_buf
  24. CORD_ec_append_cord
  25. CORD_nul_func
  26. CORD_chars
  27. CORD_from_file_eager
  28. cache_line
  29. lf_state
  30. refill_data
  31. refill_cache
  32. CORD_lf_func
  33. CORD_lf_close_proc
  34. CORD_from_file_lazy_inner
  35. CORD_from_file_lazy
  36. CORD_from_file

   1 /*
   2  * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
   3  *
   4  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   5  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   6  *
   7  * Permission is hereby granted to use or copy this program
   8  * for any purpose,  provided the above notices are retained on all copies.
   9  * Permission to modify the code and to distribute modified code is granted,
  10  * provided the above notices are retained, and a notice that the code was
  11  * modified is included with the above copyright notice.
  12  *
  13  * Author: Hans-J. Boehm (boehm@parc.xerox.com)
  14  */
  15 /*
  16  * These are functions on cords that do not need to understand their
  17  * implementation.  They serve also serve as example client code for
  18  * cord_basics.
  19  */
  20 /* Boehm, December 8, 1995 1:53 pm PST */
  21 # include <stdio.h>
  22 # include <string.h>
  23 # include <stdlib.h>
  24 # include <stdarg.h>
  25 # include "cord.h"
  26 # include "ec.h"
  27 # define I_HIDE_POINTERS        /* So we get access to allocation lock. */
  28                                 /* We use this for lazy file reading,   */
  29                                 /* so that we remain independent        */
  30                                 /* of the threads primitives.           */
  31 # include "gc.h"
  32 
  33 /* For now we assume that pointer reads and writes are atomic,  */
  34 /* i.e. another thread always sees the state before or after    */
  35 /* a write.  This might be false on a Motorola M68K with        */
  36 /* pointers that are not 32-bit aligned.  But there probably    */
  37 /* aren't too many threads packages running on those.           */
  38 # define ATOMIC_WRITE(x,y) (x) = (y)
  39 # define ATOMIC_READ(x) (*(x))
  40 
  41 /* The standard says these are in stdio.h, but they aren't always: */
  42 # ifndef SEEK_SET
  43 #   define SEEK_SET 0
  44 # endif
  45 # ifndef SEEK_END
  46 #   define SEEK_END 2
  47 # endif
  48 
  49 # define BUFSZ 2048     /* Size of stack allocated buffers when         */
  50                         /* we want large buffers.                       */
  51 
  52 typedef void (* oom_fn)(void);
  53 
  54 # define OUT_OF_MEMORY {  if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
  55                           ABORT("Out of memory\n"); }
  56 # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
  57 
  58 CORD CORD_cat_char(CORD x, char c)
  59 {
  60     register char * string;
  61     
  62     if (c == '\0') return(CORD_cat(x, CORD_nul(1)));
  63     string = GC_MALLOC_ATOMIC(2);
  64     if (string == 0) OUT_OF_MEMORY;
  65     string[0] = c;
  66     string[1] = '\0';
  67     return(CORD_cat_char_star(x, string, 1));
  68 }
  69 
  70 CORD CORD_catn(int nargs, ...)
  71 {
  72     register CORD result = CORD_EMPTY;
  73     va_list args;
  74     register int i;
  75 
  76     va_start(args, nargs);
  77     for (i = 0; i < nargs; i++) {
  78         register CORD next = va_arg(args, CORD);
  79         result = CORD_cat(result, next);
  80     }
  81     va_end(args);
  82     return(result);
  83 }
  84 
  85 typedef struct {
  86         size_t len;
  87         size_t count;
  88         char * buf;
  89 } CORD_fill_data;
  90 
  91 int CORD_fill_proc(char c, void * client_data)
  92 {
  93     register CORD_fill_data * d = (CORD_fill_data *)client_data;
  94     register size_t count = d -> count;
  95     
  96     (d -> buf)[count] = c;
  97     d -> count = ++count;
  98     if (count >= d -> len) {
  99         return(1);
 100     } else {
 101         return(0);
 102     }
 103 }
 104 
 105 int CORD_batched_fill_proc(const char * s, void * client_data)
 106 {
 107     register CORD_fill_data * d = (CORD_fill_data *)client_data;
 108     register size_t count = d -> count;
 109     register size_t max = d -> len;
 110     register char * buf = d -> buf;
 111     register const char * t = s;
 112     
 113     while((buf[count] = *t++) != '\0') {
 114         count++;
 115         if (count >= max) {
 116             d -> count = count;
 117             return(1);
 118         }
 119     }
 120     d -> count = count;
 121     return(0);
 122 }
 123 
 124 /* Fill buf with len characters starting at i.                          */
 125 /* Assumes len characters are available.                                */ 
 126 void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf)
 127 {
 128     CORD_fill_data fd;
 129     
 130     fd.len = len;
 131     fd.buf = buf;
 132     fd.count = 0;
 133     (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
 134 }
 135 
 136 int CORD_cmp(CORD x, CORD y)
 137 {
 138     CORD_pos xpos;
 139     CORD_pos ypos;
 140     register size_t avail, yavail;
 141     
 142     if (y == CORD_EMPTY) return(x != CORD_EMPTY);
 143     if (x == CORD_EMPTY) return(-1);
 144     if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y));
 145     CORD_set_pos(xpos, x, 0);
 146     CORD_set_pos(ypos, y, 0);
 147     for(;;) {
 148         if (!CORD_pos_valid(xpos)) {
 149             if (CORD_pos_valid(ypos)) {
 150                 return(-1);
 151             } else {
 152                 return(0);
 153             }
 154         }
 155         if (!CORD_pos_valid(ypos)) {
 156             return(1);
 157         }
 158         if ((avail = CORD_pos_chars_left(xpos)) <= 0
 159             || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
 160             register char xcurrent = CORD_pos_fetch(xpos);
 161             register char ycurrent = CORD_pos_fetch(ypos);
 162             if (xcurrent != ycurrent) return(xcurrent - ycurrent);
 163             CORD_next(xpos);
 164             CORD_next(ypos);
 165         } else {
 166             /* process as many characters as we can     */
 167             register int result;
 168             
 169             if (avail > yavail) avail = yavail;
 170             result = strncmp(CORD_pos_cur_char_addr(xpos),
 171                              CORD_pos_cur_char_addr(ypos), avail);
 172             if (result != 0) return(result);
 173             CORD_pos_advance(xpos, avail);
 174             CORD_pos_advance(ypos, avail);
 175         }
 176     }
 177 }
 178 
 179 int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len)
 180 {
 181     CORD_pos xpos;
 182     CORD_pos ypos;
 183     register size_t count;
 184     register long avail, yavail;
 185     
 186     CORD_set_pos(xpos, x, x_start);
 187     CORD_set_pos(ypos, y, y_start);
 188     for(count = 0; count < len;) {
 189         if (!CORD_pos_valid(xpos)) {
 190             if (CORD_pos_valid(ypos)) {
 191                 return(-1);
 192             } else {
 193                 return(0);
 194             }
 195         }
 196         if (!CORD_pos_valid(ypos)) {
 197             return(1);
 198         }
 199         if ((avail = CORD_pos_chars_left(xpos)) <= 0
 200             || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
 201             register char xcurrent = CORD_pos_fetch(xpos);
 202             register char ycurrent = CORD_pos_fetch(ypos);
 203             if (xcurrent != ycurrent) return(xcurrent - ycurrent);
 204             CORD_next(xpos);
 205             CORD_next(ypos);
 206             count++;
 207         } else {
 208             /* process as many characters as we can     */
 209             register int result;
 210             
 211             if (avail > yavail) avail = yavail;
 212             count += avail;
 213             if (count > len) avail -= (count - len);
 214             result = strncmp(CORD_pos_cur_char_addr(xpos),
 215                              CORD_pos_cur_char_addr(ypos), (size_t)avail);
 216             if (result != 0) return(result);
 217             CORD_pos_advance(xpos, (size_t)avail);
 218             CORD_pos_advance(ypos, (size_t)avail);
 219         }
 220     }
 221     return(0);
 222 }
 223 
 224 char * CORD_to_char_star(CORD x)
 225 {
 226     register size_t len = CORD_len(x);
 227     char * result = GC_MALLOC_ATOMIC(len + 1);
 228     
 229     if (result == 0) OUT_OF_MEMORY;
 230     CORD_fill_buf(x, 0, len, result);
 231     result[len] = '\0';
 232     return(result);
 233 }
 234 
 235 CORD CORD_from_char_star(const char *s)
 236 {
 237     char * result;
 238     size_t len = strlen(s);
 239 
 240     if (0 == len) return(CORD_EMPTY);
 241     result = GC_MALLOC_ATOMIC(len + 1);
 242     if (result == 0) OUT_OF_MEMORY;
 243     memcpy(result, s, len+1);
 244     return(result);
 245 }
 246 
 247 const char * CORD_to_const_char_star(CORD x)
 248 {
 249     if (x == 0) return("");
 250     if (CORD_IS_STRING(x)) return((const char *)x);
 251     return(CORD_to_char_star(x));
 252 }
 253 
 254 char CORD_fetch(CORD x, size_t i)
 255 {
 256     CORD_pos xpos;
 257     
 258     CORD_set_pos(xpos, x, i);
 259     if (!CORD_pos_valid(xpos)) ABORT("bad index?");
 260     return(CORD_pos_fetch(xpos));
 261 }
 262 
 263 
 264 int CORD_put_proc(char c, void * client_data)
 265 {
 266     register FILE * f = (FILE *)client_data;
 267     
 268     return(putc(c, f) == EOF);
 269 }
 270 
 271 int CORD_batched_put_proc(const char * s, void * client_data)
 272 {
 273     register FILE * f = (FILE *)client_data;
 274     
 275     return(fputs(s, f) == EOF);
 276 }
 277     
 278 
 279 int CORD_put(CORD x, FILE * f)
 280 {
 281     if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
 282         return(EOF);
 283     } else {
 284         return(1);
 285     }
 286 }
 287 
 288 typedef struct {
 289     size_t pos;         /* Current position in the cord */
 290     char target;        /* Character we're looking for  */
 291 } chr_data;
 292 
 293 int CORD_chr_proc(char c, void * client_data)
 294 {
 295     register chr_data * d = (chr_data *)client_data;
 296     
 297     if (c == d -> target) return(1);
 298     (d -> pos) ++;
 299     return(0);
 300 }
 301 
 302 int CORD_rchr_proc(char c, void * client_data)
 303 {
 304     register chr_data * d = (chr_data *)client_data;
 305     
 306     if (c == d -> target) return(1);
 307     (d -> pos) --;
 308     return(0);
 309 }
 310 
 311 int CORD_batched_chr_proc(const char *s, void * client_data)
 312 {
 313     register chr_data * d = (chr_data *)client_data;
 314     register char * occ = strchr(s, d -> target);
 315     
 316     if (occ == 0) {
 317         d -> pos += strlen(s);
 318         return(0);
 319     } else {
 320         d -> pos += occ - s;
 321         return(1);
 322     }
 323 }
 324 
 325 size_t CORD_chr(CORD x, size_t i, int c)
 326 {
 327     chr_data d;
 328     
 329     d.pos = i;
 330     d.target = c;
 331     if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
 332         return(d.pos);
 333     } else {
 334         return(CORD_NOT_FOUND);
 335     }
 336 }
 337 
 338 size_t CORD_rchr(CORD x, size_t i, int c)
 339 {
 340     chr_data d;
 341     
 342     d.pos = i;
 343     d.target = c;
 344     if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
 345         return(d.pos);
 346     } else {
 347         return(CORD_NOT_FOUND);
 348     }
 349 }
 350 
 351 /* Find the first occurrence of s in x at position start or later.      */
 352 /* This uses an asymptotically poor algorithm, which should typically   */
 353 /* perform acceptably.  We compare the first few characters directly,   */
 354 /* and call CORD_ncmp whenever there is a partial match.                */
 355 /* This has the advantage that we allocate very little, or not at all.  */
 356 /* It's very fast if there are few close misses.                        */
 357 size_t CORD_str(CORD x, size_t start, CORD s)
 358 {
 359     CORD_pos xpos;
 360     size_t xlen = CORD_len(x);
 361     size_t slen;
 362     register size_t start_len;
 363     const char * s_start;
 364     unsigned long s_buf = 0;    /* The first few characters of s        */
 365     unsigned long x_buf = 0;    /* Start of candidate substring.        */
 366                                 /* Initialized only to make compilers   */
 367                                 /* happy.                               */
 368     unsigned long mask = 0;
 369     register size_t i;
 370     register size_t match_pos;
 371     
 372     if (s == CORD_EMPTY) return(start);
 373     if (CORD_IS_STRING(s)) {
 374         s_start = s;
 375         slen = strlen(s);
 376     } else {
 377         s_start = CORD_to_char_star(CORD_substr(s, 0, sizeof(unsigned long)));
 378         slen = CORD_len(s);
 379     }
 380     if (xlen < start || xlen - start < slen) return(CORD_NOT_FOUND);
 381     start_len = slen;
 382     if (start_len > sizeof(unsigned long)) start_len = sizeof(unsigned long);
 383     CORD_set_pos(xpos, x, start);
 384     for (i = 0; i < start_len; i++) {
 385         mask <<= 8;
 386         mask |= 0xff;
 387         s_buf <<= 8;
 388         s_buf |= (unsigned char)s_start[i];
 389         x_buf <<= 8;
 390         x_buf |= (unsigned char)CORD_pos_fetch(xpos);
 391         CORD_next(xpos);
 392     }
 393     for (match_pos = start; ; match_pos++) {
 394         if ((x_buf & mask) == s_buf) {
 395             if (slen == start_len ||
 396                 CORD_ncmp(x, match_pos + start_len,
 397                           s, start_len, slen - start_len) == 0) {
 398                 return(match_pos);
 399             }
 400         }
 401         if ( match_pos == xlen - slen ) {
 402             return(CORD_NOT_FOUND);
 403         }
 404         x_buf <<= 8;
 405         x_buf |= (unsigned char)CORD_pos_fetch(xpos);
 406         CORD_next(xpos);
 407     }
 408 }
 409 
 410 void CORD_ec_flush_buf(CORD_ec x)
 411 {
 412     register size_t len = x[0].ec_bufptr - x[0].ec_buf;
 413     char * s;
 414 
 415     if (len == 0) return;
 416     s = GC_MALLOC_ATOMIC(len+1);
 417     memcpy(s, x[0].ec_buf, len);
 418     s[len] = '\0';
 419     x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
 420     x[0].ec_bufptr = x[0].ec_buf;
 421 }
 422 
 423 void CORD_ec_append_cord(CORD_ec x, CORD s)
 424 {
 425     CORD_ec_flush_buf(x);
 426     x[0].ec_cord = CORD_cat(x[0].ec_cord, s);
 427 }
 428 
 429 /*ARGSUSED*/
 430 char CORD_nul_func(size_t i, void * client_data)
 431 {
 432     return((char)(unsigned long)client_data);
 433 }
 434 
 435 
 436 CORD CORD_chars(char c, size_t i)
 437 {
 438     return(CORD_from_fn(CORD_nul_func, (void *)(unsigned long)c, i));
 439 }
 440 
 441 CORD CORD_from_file_eager(FILE * f)
 442 {
 443     register int c;
 444     CORD_ec ecord;
 445     
 446     CORD_ec_init(ecord);
 447     for(;;) {
 448         c = getc(f);
 449         if (c == 0) {
 450           /* Append the right number of NULs    */
 451           /* Note that any string of NULs is rpresented in 4 words,     */
 452           /* independent of its length.                                 */
 453             register size_t count = 1;
 454             
 455             CORD_ec_flush_buf(ecord);
 456             while ((c = getc(f)) == 0) count++;
 457             ecord[0].ec_cord = CORD_cat(ecord[0].ec_cord, CORD_nul(count));
 458         }
 459         if (c == EOF) break;
 460         CORD_ec_append(ecord, c);
 461     }
 462     (void) fclose(f);
 463     return(CORD_balance(CORD_ec_to_cord(ecord)));
 464 }
 465 
 466 /* The state maintained for a lazily read file consists primarily       */
 467 /* of a large direct-mapped cache of previously read values.            */
 468 /* We could rely more on stdio buffering.  That would have 2            */
 469 /* disadvantages:                                                       */
 470 /*      1) Empirically, not all fseek implementations preserve the      */
 471 /*         buffer whenever they could.                                  */
 472 /*      2) It would fail if 2 different sections of a long cord         */
 473 /*         were being read alternately.                                 */
 474 /* We do use the stdio buffer for read ahead.                           */
 475 /* To guarantee thread safety in the presence of atomic pointer         */
 476 /* writes, cache lines are always replaced, and never modified in       */
 477 /* place.                                                               */
 478 
 479 # define LOG_CACHE_SZ 14
 480 # define CACHE_SZ (1 << LOG_CACHE_SZ)
 481 # define LOG_LINE_SZ 9
 482 # define LINE_SZ (1 << LOG_LINE_SZ)
 483 
 484 typedef struct {
 485     size_t tag;
 486     char data[LINE_SZ];
 487         /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ        */
 488 } cache_line;
 489 
 490 typedef struct {
 491     FILE * lf_file;
 492     size_t lf_current;  /* Current file pointer value */
 493     cache_line * volatile lf_cache[CACHE_SZ/LINE_SZ];
 494 } lf_state;
 495 
 496 # define MOD_CACHE_SZ(n) ((n) & (CACHE_SZ - 1))
 497 # define DIV_CACHE_SZ(n) ((n) >> LOG_CACHE_SZ)
 498 # define MOD_LINE_SZ(n) ((n) & (LINE_SZ - 1))
 499 # define DIV_LINE_SZ(n) ((n) >> LOG_LINE_SZ)
 500 # define LINE_START(n) ((n) & ~(LINE_SZ - 1))
 501 
 502 typedef struct {
 503     lf_state * state;
 504     size_t file_pos;    /* Position of needed character. */
 505     cache_line * new_cache;
 506 } refill_data;
 507 
 508 /* Executed with allocation lock. */
 509 static char refill_cache(client_data)
 510 refill_data * client_data;
 511 {
 512     register lf_state * state = client_data -> state;
 513     register size_t file_pos = client_data -> file_pos;
 514     FILE *f = state -> lf_file;
 515     size_t line_start = LINE_START(file_pos);
 516     size_t line_no = DIV_LINE_SZ(MOD_CACHE_SZ(file_pos));
 517     cache_line * new_cache = client_data -> new_cache;
 518     
 519     if (line_start != state -> lf_current
 520         && fseek(f, line_start, SEEK_SET) != 0) {
 521             ABORT("fseek failed");
 522     }
 523     if (fread(new_cache -> data, sizeof(char), LINE_SZ, f)
 524         <= file_pos - line_start) {
 525         ABORT("fread failed");
 526     }
 527     new_cache -> tag = DIV_LINE_SZ(file_pos);
 528     /* Store barrier goes here. */
 529     ATOMIC_WRITE(state -> lf_cache[line_no], new_cache);
 530     state -> lf_current = line_start + LINE_SZ;
 531     return(new_cache->data[MOD_LINE_SZ(file_pos)]);
 532 }
 533 
 534 char CORD_lf_func(size_t i, void * client_data)
 535 {
 536     register lf_state * state = (lf_state *)client_data;
 537     register cache_line * volatile * cl_addr =
 538                 &(state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))]);
 539     register cache_line * cl = (cache_line *)ATOMIC_READ(cl_addr);
 540     
 541     if (cl == 0 || cl -> tag != DIV_LINE_SZ(i)) {
 542         /* Cache miss */
 543         refill_data rd;
 544         
 545         rd.state = state;
 546         rd.file_pos =  i;
 547         rd.new_cache = GC_NEW_ATOMIC(cache_line);
 548         if (rd.new_cache == 0) OUT_OF_MEMORY;
 549         return((char)(GC_word)
 550                   GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd));
 551     }
 552     return(cl -> data[MOD_LINE_SZ(i)]);
 553 }    
 554 
 555 /*ARGSUSED*/
 556 void CORD_lf_close_proc(void * obj, void * client_data)  
 557 {
 558     if (fclose(((lf_state *)obj) -> lf_file) != 0) {
 559         ABORT("CORD_lf_close_proc: fclose failed");
 560     }
 561 }                       
 562 
 563 CORD CORD_from_file_lazy_inner(FILE * f, size_t len)
 564 {
 565     register lf_state * state = GC_NEW(lf_state);
 566     register int i;
 567     
 568     if (state == 0) OUT_OF_MEMORY;
 569     if (len != 0) {
 570         /* Dummy read to force buffer allocation.       */
 571         /* This greatly increases the probability       */
 572         /* of avoiding deadlock if buffer allocation    */
 573         /* is redirected to GC_malloc and the           */
 574         /* world is multithreaded.                      */
 575         char buf[1];
 576 
 577         (void) fread(buf, 1, 1, f); 
 578         rewind(f);
 579     }
 580     state -> lf_file = f;
 581     for (i = 0; i < CACHE_SZ/LINE_SZ; i++) {
 582         state -> lf_cache[i] = 0;
 583     }
 584     state -> lf_current = 0;
 585     GC_REGISTER_FINALIZER(state, CORD_lf_close_proc, 0, 0, 0);
 586     return(CORD_from_fn(CORD_lf_func, state, len));
 587 }
 588 
 589 CORD CORD_from_file_lazy(FILE * f)
 590 {
 591     register long len;
 592     
 593     if (fseek(f, 0l, SEEK_END) != 0) {
 594         ABORT("Bad fd argument - fseek failed");
 595     }
 596     if ((len = ftell(f)) < 0) {
 597         ABORT("Bad fd argument - ftell failed");
 598     }
 599     rewind(f);
 600     return(CORD_from_file_lazy_inner(f, (size_t)len));
 601 }
 602 
 603 # define LAZY_THRESHOLD (128*1024 + 1)
 604 
 605 CORD CORD_from_file(FILE * f)
 606 {
 607     register long len;
 608     
 609     if (fseek(f, 0l, SEEK_END) != 0) {
 610         ABORT("Bad fd argument - fseek failed");
 611     }
 612     if ((len = ftell(f)) < 0) {
 613         ABORT("Bad fd argument - ftell failed");
 614     }
 615     rewind(f);
 616     if (len < LAZY_THRESHOLD) {
 617         return(CORD_from_file_eager(f));
 618     } else {
 619         return(CORD_from_file_lazy_inner(f, (size_t)len));
 620     }
 621 }

/* [<][>][^][v][top][bottom][index][help] */