root/gc/cord/cordbscs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. CordRep
  2. CORD_dump_inner
  3. CORD_dump
  4. CORD_cat_char_star
  5. CORD_cat
  6. CORD_from_fn
  7. CORD_len
  8. CORD_index_access_fn
  9. CORD_apply_access_fn
  10. CORD_substr_closure
  11. CORD_substr_checked
  12. CORD_substr
  13. CORD_iter5
  14. CORD_iter
  15. CORD_riter4
  16. CORD_riter
  17. ForestElement
  18. CORD_init_min_len
  19. CORD_init_forest
  20. CORD_add_forest
  21. CORD_concat_forest
  22. CORD_balance_insert
  23. CORD_balance
  24. CORD__extend_path
  25. CORD__pos_fetch
  26. CORD__next
  27. CORD__prev
  28. CORD_pos_fetch
  29. CORD_next
  30. CORD_prev
  31. CORD_pos_to_index
  32. CORD_pos_to_cord
  33. CORD_pos_valid
  34. CORD_set_pos

   1 /*
   2  * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
   3  *
   4  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   5  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   6  *
   7  * Permission is hereby granted to use or copy this program
   8  * for any purpose,  provided the above notices are retained on all copies.
   9  * Permission to modify the code and to distribute modified code is granted,
  10  * provided the above notices are retained, and a notice that the code was
  11  * modified is included with the above copyright notice.
  12  *
  13  * Author: Hans-J. Boehm (boehm@parc.xerox.com)
  14  */
  15 /* Boehm, October 3, 1994 5:19 pm PDT */
  16 # include "gc.h"
  17 # include "cord.h"
  18 # include <stdlib.h>
  19 # include <stdio.h>
  20 # include <string.h>
  21 
  22 /* An implementation of the cord primitives.  These are the only        */
  23 /* Functions that understand the representation.  We perform only       */
  24 /* minimal checks on arguments to these functions.  Out of bounds       */
  25 /* arguments to the iteration functions may result in client functions  */
  26 /* invoked on garbage data.  In most cases, client functions should be  */
  27 /* programmed defensively enough that this does not result in memory    */
  28 /* smashes.                                                             */ 
  29 
  30 typedef void (* oom_fn)(void);
  31 
  32 oom_fn CORD_oom_fn = (oom_fn) 0;
  33 
  34 # define OUT_OF_MEMORY {  if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
  35                           ABORT("Out of memory\n"); }
  36 # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
  37 
  38 typedef unsigned long word;
  39 
  40 typedef union {
  41     struct Concatenation {
  42         char null;
  43         char header;
  44         char depth;     /* concatenation nesting depth. */
  45         unsigned char left_len;
  46                         /* Length of left child if it is sufficiently   */
  47                         /* short; 0 otherwise.                          */
  48 #           define MAX_LEFT_LEN 255
  49         word len;
  50         CORD left;      /* length(left) > 0     */
  51         CORD right;     /* length(right) > 0    */
  52     } concatenation;
  53     struct Function {
  54         char null;
  55         char header;
  56         char depth;     /* always 0     */
  57         char left_len;  /* always 0     */
  58         word len;
  59         CORD_fn fn;
  60         void * client_data;
  61     } function;
  62     struct Generic {
  63         char null;
  64         char header;
  65         char depth;
  66         char left_len;
  67         word len;
  68     } generic;
  69     char string[1];
  70 } CordRep;
  71 
  72 # define CONCAT_HDR 1
  73         
  74 # define FN_HDR 4
  75 # define SUBSTR_HDR 6
  76         /* Substring nodes are a special case of function nodes.        */
  77         /* The client_data field is known to point to a substr_args     */
  78         /* structure, and the function is either CORD_apply_access_fn   */
  79         /* or CORD_index_access_fn.                                     */
  80 
  81 /* The following may be applied only to function and concatenation nodes: */
  82 #define IS_CONCATENATION(s)  (((CordRep *)s)->generic.header == CONCAT_HDR)
  83 
  84 #define IS_FUNCTION(s)  ((((CordRep *)s)->generic.header & FN_HDR) != 0)
  85 
  86 #define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
  87 
  88 #define LEN(s) (((CordRep *)s) -> generic.len)
  89 #define DEPTH(s) (((CordRep *)s) -> generic.depth)
  90 #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
  91 
  92 #define LEFT_LEN(c) ((c) -> left_len != 0? \
  93                                 (c) -> left_len \
  94                                 : (CORD_IS_STRING((c) -> left) ? \
  95                                         (c) -> len - GEN_LEN((c) -> right) \
  96                                         : LEN((c) -> left)))
  97 
  98 #define SHORT_LIMIT (sizeof(CordRep) - 1)
  99         /* Cords shorter than this are C strings */
 100 
 101 
 102 /* Dump the internal representation of x to stdout, with initial        */
 103 /* indentation level n.                                                 */
 104 void CORD_dump_inner(CORD x, unsigned n)
 105 {
 106     register size_t i;
 107     
 108     for (i = 0; i < (size_t)n; i++) {
 109         fputs("  ", stdout);
 110     }
 111     if (x == 0) {
 112         fputs("NIL\n", stdout);
 113     } else if (CORD_IS_STRING(x)) {
 114         for (i = 0; i <= SHORT_LIMIT; i++) {
 115             if (x[i] == '\0') break;
 116             putchar(x[i]);
 117         }
 118         if (x[i] != '\0') fputs("...", stdout);
 119         putchar('\n');
 120     } else if (IS_CONCATENATION(x)) {
 121         register struct Concatenation * conc =
 122                                 &(((CordRep *)x) -> concatenation);
 123         printf("Concatenation: %p (len: %d, depth: %d)\n",
 124                x, (int)(conc -> len), (int)(conc -> depth));
 125         CORD_dump_inner(conc -> left, n+1);
 126         CORD_dump_inner(conc -> right, n+1);
 127     } else /* function */{
 128         register struct Function * func =
 129                                 &(((CordRep *)x) -> function);
 130         if (IS_SUBSTR(x)) printf("(Substring) ");
 131         printf("Function: %p (len: %d): ", x, (int)(func -> len));
 132         for (i = 0; i < 20 && i < func -> len; i++) {
 133             putchar((*(func -> fn))(i, func -> client_data));
 134         }
 135         if (i < func -> len) fputs("...", stdout);
 136         putchar('\n');
 137     }
 138 }
 139 
 140 /* Dump the internal representation of x to stdout      */
 141 void CORD_dump(CORD x)
 142 {
 143     CORD_dump_inner(x, 0);
 144     fflush(stdout);
 145 }
 146 
 147 CORD CORD_cat_char_star(CORD x, const char * y, size_t leny)
 148 {
 149     register size_t result_len;
 150     register size_t lenx;
 151     register int depth;
 152     
 153     if (x == CORD_EMPTY) return(y);
 154     if (leny == 0) return(x);
 155     if (CORD_IS_STRING(x)) {
 156         lenx = strlen(x);
 157         result_len = lenx + leny;
 158         if (result_len <= SHORT_LIMIT) {
 159             register char * result = GC_MALLOC_ATOMIC(result_len+1);
 160         
 161             if (result == 0) OUT_OF_MEMORY;
 162             memcpy(result, x, lenx);
 163             memcpy(result + lenx, y, leny);
 164             result[result_len] = '\0';
 165             return((CORD) result);
 166         } else {
 167             depth = 1;
 168         }
 169     } else {
 170         register CORD right;
 171         register CORD left;
 172         register char * new_right;
 173         register size_t right_len;
 174         
 175         lenx = LEN(x);
 176         
 177         if (leny <= SHORT_LIMIT/2
 178             && IS_CONCATENATION(x)
 179             && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
 180             /* Merge y into right part of x. */
 181             if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
 182                 right_len = lenx - LEN(left);
 183             } else if (((CordRep *)x) -> concatenation.left_len != 0) {
 184                 right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
 185             } else {
 186                 right_len = strlen(right);
 187             }
 188             result_len = right_len + leny;  /* length of new_right */
 189             if (result_len <= SHORT_LIMIT) {
 190                 new_right = GC_MALLOC_ATOMIC(result_len + 1);
 191                 memcpy(new_right, right, right_len);
 192                 memcpy(new_right + right_len, y, leny);
 193                 new_right[result_len] = '\0';
 194                 y = new_right;
 195                 leny = result_len;
 196                 x = left;
 197                 lenx -= right_len;
 198                 /* Now fall through to concatenate the two pieces: */
 199             }
 200             if (CORD_IS_STRING(x)) {
 201                 depth = 1;
 202             } else {
 203                 depth = DEPTH(x) + 1;
 204             }
 205         } else {
 206             depth = DEPTH(x) + 1;
 207         }
 208         result_len = lenx + leny;
 209     }
 210     {
 211       /* The general case; lenx, result_len is known: */
 212         register struct Concatenation * result;
 213         
 214         result = GC_NEW(struct Concatenation);
 215         if (result == 0) OUT_OF_MEMORY;
 216         result->header = CONCAT_HDR;
 217         result->depth = depth;
 218         if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
 219         result->len = result_len;
 220         result->left = x;
 221         result->right = y;
 222         if (depth >= MAX_DEPTH) {
 223             return(CORD_balance((CORD)result));
 224         } else {
 225             return((CORD) result);
 226         }
 227     }
 228 }
 229 
 230 
 231 CORD CORD_cat(CORD x, CORD y)
 232 {
 233     register size_t result_len;
 234     register int depth;
 235     register size_t lenx;
 236     
 237     if (x == CORD_EMPTY) return(y);
 238     if (y == CORD_EMPTY) return(x);
 239     if (CORD_IS_STRING(y)) {
 240         return(CORD_cat_char_star(x, y, strlen(y)));
 241     } else if (CORD_IS_STRING(x)) {
 242         lenx = strlen(x);
 243         depth = DEPTH(y) + 1;
 244     } else {
 245         register int depthy = DEPTH(y);
 246         
 247         lenx = LEN(x);
 248         depth = DEPTH(x) + 1;
 249         if (depthy >= depth) depth = depthy + 1;
 250     }
 251     result_len = lenx + LEN(y);
 252     {
 253         register struct Concatenation * result;
 254         
 255         result = GC_NEW(struct Concatenation);
 256         if (result == 0) OUT_OF_MEMORY;
 257         result->header = CONCAT_HDR;
 258         result->depth = depth;
 259         if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
 260         result->len = result_len;
 261         result->left = x;
 262         result->right = y;
 263         if (depth >= MAX_DEPTH) {
 264             return(CORD_balance((CORD)result));
 265         } else {
 266             return((CORD) result);
 267         }
 268     }
 269 }
 270 
 271 
 272 
 273 CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
 274 {
 275     if (len <= 0) return(0);
 276     if (len <= SHORT_LIMIT) {
 277         register char * result;
 278         register size_t i;
 279         char buf[SHORT_LIMIT+1];
 280         register char c;
 281         
 282         for (i = 0; i < len; i++) {
 283             c = (*fn)(i, client_data);
 284             if (c == '\0') goto gen_case;
 285             buf[i] = c;
 286         }
 287         buf[i] = '\0';
 288         result = GC_MALLOC_ATOMIC(len+1);
 289         if (result == 0) OUT_OF_MEMORY;
 290         strcpy(result, buf);
 291         result[len] = '\0';
 292         return((CORD) result);
 293     }
 294   gen_case:
 295     {
 296         register struct Function * result;
 297         
 298         result = GC_NEW(struct Function);
 299         if (result == 0) OUT_OF_MEMORY;
 300         result->header = FN_HDR;
 301         /* depth is already 0 */
 302         result->len = len;
 303         result->fn = fn;
 304         result->client_data = client_data;
 305         return((CORD) result);
 306     }
 307 }
 308 
 309 size_t CORD_len(CORD x)
 310 {
 311     if (x == 0) {
 312         return(0);
 313     } else {
 314         return(GEN_LEN(x));
 315     }
 316 }
 317 
 318 struct substr_args {
 319     CordRep * sa_cord;
 320     size_t sa_index;
 321 };
 322 
 323 char CORD_index_access_fn(size_t i, void * client_data)
 324 {
 325     register struct substr_args *descr = (struct substr_args *)client_data;
 326     
 327     return(((char *)(descr->sa_cord))[i + descr->sa_index]);
 328 }
 329 
 330 char CORD_apply_access_fn(size_t i, void * client_data)
 331 {
 332     register struct substr_args *descr = (struct substr_args *)client_data;
 333     register struct Function * fn_cord = &(descr->sa_cord->function);
 334     
 335     return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
 336 }
 337 
 338 /* A version of CORD_substr that simply returns a function node, thus   */
 339 /* postponing its work. The fourth argument is a function that may      */
 340 /* be used for efficient access to the ith character.                   */
 341 /* Assumes i >= 0 and i + n < length(x).                                */
 342 CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
 343 {
 344     register struct substr_args * sa = GC_NEW(struct substr_args);
 345     CORD result;
 346     
 347     if (sa == 0) OUT_OF_MEMORY;
 348     sa->sa_cord = (CordRep *)x;
 349     sa->sa_index = i;
 350     result = CORD_from_fn(f, (void *)sa, n);
 351     ((CordRep *)result) -> function.header = SUBSTR_HDR;
 352     return (result);
 353 }
 354 
 355 # define SUBSTR_LIMIT (10 * SHORT_LIMIT)
 356         /* Substrings of function nodes and flat strings shorter than   */
 357         /* this are flat strings.  Othewise we use a functional         */
 358         /* representation, which is significantly slower to access.     */
 359 
 360 /* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
 361 CORD CORD_substr_checked(CORD x, size_t i, size_t n)
 362 {
 363     if (CORD_IS_STRING(x)) {
 364         if (n > SUBSTR_LIMIT) {
 365             return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
 366         } else {
 367             register char * result = GC_MALLOC_ATOMIC(n+1);
 368             
 369             if (result == 0) OUT_OF_MEMORY;
 370             strncpy(result, x+i, n);
 371             result[n] = '\0';
 372             return(result);
 373         }
 374     } else if (IS_CONCATENATION(x)) {
 375         register struct Concatenation * conc
 376                         = &(((CordRep *)x) -> concatenation);
 377         register size_t left_len;
 378         register size_t right_len;
 379         
 380         left_len = LEFT_LEN(conc);
 381         right_len = conc -> len - left_len;
 382         if (i >= left_len) {
 383             if (n == right_len) return(conc -> right);
 384             return(CORD_substr_checked(conc -> right, i - left_len, n));
 385         } else if (i+n <= left_len) {
 386             if (n == left_len) return(conc -> left);
 387             return(CORD_substr_checked(conc -> left, i, n));
 388         } else {
 389             /* Need at least one character from each side. */
 390             register CORD left_part;
 391             register CORD right_part;
 392             register size_t left_part_len = left_len - i;
 393         
 394             if (i == 0) {
 395                 left_part = conc -> left;
 396             } else {
 397                 left_part = CORD_substr_checked(conc -> left, i, left_part_len);
 398             }
 399             if (i + n == right_len + left_len) {
 400                  right_part = conc -> right;
 401             } else {
 402                  right_part = CORD_substr_checked(conc -> right, 0,
 403                                                   n - left_part_len);
 404             }
 405             return(CORD_cat(left_part, right_part));
 406         }
 407     } else /* function */ {
 408         if (n > SUBSTR_LIMIT) {
 409             if (IS_SUBSTR(x)) {
 410                 /* Avoid nesting substring nodes.       */
 411                 register struct Function * f = &(((CordRep *)x) -> function);
 412                 register struct substr_args *descr =
 413                                 (struct substr_args *)(f -> client_data);
 414                 
 415                 return(CORD_substr_closure((CORD)descr->sa_cord,
 416                                            i + descr->sa_index,
 417                                            n, f -> fn));
 418             } else {
 419                 return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
 420             }
 421         } else {
 422             char * result;
 423             register struct Function * f = &(((CordRep *)x) -> function);
 424             char buf[SUBSTR_LIMIT+1];
 425             register char * p = buf;
 426             register char c;
 427             register int j;
 428             register int lim = i + n;
 429             
 430             for (j = i; j < lim; j++) {
 431                 c = (*(f -> fn))(j, f -> client_data);
 432                 if (c == '\0') {
 433                     return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
 434                 }
 435                 *p++ = c;
 436             }
 437             *p = '\0';
 438             result = GC_MALLOC_ATOMIC(n+1);
 439             if (result == 0) OUT_OF_MEMORY;
 440             strcpy(result, buf);
 441             return(result);
 442         }
 443     }
 444 }
 445 
 446 CORD CORD_substr(CORD x, size_t i, size_t n)
 447 {
 448     register size_t len = CORD_len(x);
 449     
 450     if (i >= len || n <= 0) return(0);
 451         /* n < 0 is impossible in a correct C implementation, but       */
 452         /* quite possible  under SunOS 4.X.                             */
 453     if (i + n > len) n = len - i;
 454 #   ifndef __STDC__
 455       if (i < 0) ABORT("CORD_substr: second arg. negative");
 456         /* Possible only if both client and C implementation are buggy. */
 457         /* But empirically this happens frequently.                     */
 458 #   endif
 459     return(CORD_substr_checked(x, i, n));
 460 }
 461 
 462 /* See cord.h for definition.  We assume i is in range. */
 463 int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
 464                          CORD_batched_iter_fn f2, void * client_data)
 465 {
 466     if (x == 0) return(0);
 467     if (CORD_IS_STRING(x)) {
 468         register const char *p = x+i;
 469         
 470         if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
 471         if (f2 != CORD_NO_FN) {
 472             return((*f2)(p, client_data));
 473         } else {
 474             while (*p) {
 475                 if ((*f1)(*p, client_data)) return(1);
 476                 p++;
 477             }
 478             return(0);
 479         }
 480     } else if (IS_CONCATENATION(x)) {
 481         register struct Concatenation * conc
 482                         = &(((CordRep *)x) -> concatenation);
 483         
 484         
 485         if (i > 0) {
 486             register size_t left_len = LEFT_LEN(conc);
 487             
 488             if (i >= left_len) {
 489                 return(CORD_iter5(conc -> right, i - left_len, f1, f2,
 490                                   client_data));
 491             }
 492         }
 493         if (CORD_iter5(conc -> left, i, f1, f2, client_data)) {
 494             return(1);
 495         }
 496         return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
 497     } else /* function */ {
 498         register struct Function * f = &(((CordRep *)x) -> function);
 499         register size_t j;
 500         register size_t lim = f -> len;
 501         
 502         for (j = i; j < lim; j++) {
 503             if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
 504                 return(1);
 505             }
 506         }
 507         return(0);
 508     }
 509 }
 510                         
 511 #undef CORD_iter
 512 int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
 513 {
 514     return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
 515 }
 516 
 517 int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
 518 {
 519     if (x == 0) return(0);
 520     if (CORD_IS_STRING(x)) {
 521         register const char *p = x + i;
 522         register char c;
 523                
 524         for(;;) {
 525             c = *p;
 526             if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
 527             if ((*f1)(c, client_data)) return(1);
 528             if (p == x) break;
 529             p--;
 530         }
 531         return(0);
 532     } else if (IS_CONCATENATION(x)) {
 533         register struct Concatenation * conc
 534                         = &(((CordRep *)x) -> concatenation);
 535         register CORD left_part = conc -> left;
 536         register size_t left_len;
 537         
 538         left_len = LEFT_LEN(conc);
 539         if (i >= left_len) {
 540             if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
 541                 return(1);
 542             }
 543             return(CORD_riter4(left_part, left_len - 1, f1, client_data));
 544         } else {
 545             return(CORD_riter4(left_part, i, f1, client_data));
 546         }
 547     } else /* function */ {
 548         register struct Function * f = &(((CordRep *)x) -> function);
 549         register size_t j;
 550         
 551         for (j = i; ; j--) {
 552             if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
 553                 return(1);
 554             }
 555             if (j == 0) return(0);
 556         }
 557     }
 558 }
 559 
 560 int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
 561 {
 562     return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
 563 }
 564 
 565 /*
 566  * The following functions are concerned with balancing cords.
 567  * Strategy:
 568  * Scan the cord from left to right, keeping the cord scanned so far
 569  * as a forest of balanced trees of exponentialy decreasing length.
 570  * When a new subtree needs to be added to the forest, we concatenate all
 571  * shorter ones to the new tree in the appropriate order, and then insert
 572  * the result into the forest.
 573  * Crucial invariants:
 574  * 1. The concatenation of the forest (in decreasing order) with the
 575  *     unscanned part of the rope is equal to the rope being balanced.
 576  * 2. All trees in the forest are balanced.
 577  * 3. forest[i] has depth at most i.
 578  */
 579 
 580 typedef struct {
 581     CORD c;
 582     size_t len;         /* Actual length of c   */
 583 } ForestElement;
 584 
 585 static size_t min_len [ MAX_DEPTH ];
 586 
 587 static int min_len_init = 0;
 588 
 589 int CORD_max_len;
 590 
 591 typedef ForestElement Forest [ MAX_DEPTH ];
 592                         /* forest[i].len >= fib(i+1)            */
 593                         /* The string is the concatenation      */
 594                         /* of the forest in order of DECREASING */
 595                         /* indices.                             */
 596 
 597 void CORD_init_min_len()
 598 {
 599     register int i;
 600     register size_t last, previous, current;
 601         
 602     min_len[0] = previous = 1;
 603     min_len[1] = last = 2;
 604     for (i = 2; i < MAX_DEPTH; i++) {
 605         current = last + previous;
 606         if (current < last) /* overflow */ current = last;
 607         min_len[i] = current;
 608         previous = last;
 609         last = current;
 610     }
 611     CORD_max_len = last - 1;
 612     min_len_init = 1;
 613 }
 614 
 615 
 616 void CORD_init_forest(ForestElement * forest, size_t max_len)
 617 {
 618     register int i;
 619     
 620     for (i = 0; i < MAX_DEPTH; i++) {
 621         forest[i].c = 0;
 622         if (min_len[i] > max_len) return;
 623     }
 624     ABORT("Cord too long");
 625 }
 626 
 627 /* Add a leaf to the appropriate level in the forest, cleaning          */
 628 /* out lower levels as necessary.                                       */
 629 /* Also works if x is a balanced tree of concatenations; however        */
 630 /* in this case an extra concatenation node may be inserted above x;    */
 631 /* This node should not be counted in the statement of the invariants.  */
 632 void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
 633 {
 634     register int i = 0;
 635     register CORD sum = CORD_EMPTY;
 636     register size_t sum_len = 0;
 637     
 638     while (len > min_len[i + 1]) {
 639         if (forest[i].c != 0) {
 640             sum = CORD_cat(forest[i].c, sum);
 641             sum_len += forest[i].len;
 642             forest[i].c = 0;
 643         }
 644         i++;
 645     }
 646     /* Sum has depth at most 1 greter than what would be required       */
 647     /* for balance.                                                     */
 648     sum = CORD_cat(sum, x);
 649     sum_len += len;
 650     /* If x was a leaf, then sum is now balanced.  To see this          */
 651     /* consider the two cases in which forest[i-1] either is or is      */
 652     /* not empty.                                                       */
 653     while (sum_len >= min_len[i]) {
 654         if (forest[i].c != 0) {
 655             sum = CORD_cat(forest[i].c, sum);
 656             sum_len += forest[i].len;
 657             /* This is again balanced, since sum was balanced, and has  */
 658             /* allowable depth that differs from i by at most 1.        */
 659             forest[i].c = 0;
 660         }
 661         i++;
 662     }
 663     i--;
 664     forest[i].c = sum;
 665     forest[i].len = sum_len;
 666 }
 667 
 668 CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
 669 {
 670     register int i = 0;
 671     CORD sum = 0;
 672     size_t sum_len = 0;
 673     
 674     while (sum_len != expected_len) {
 675         if (forest[i].c != 0) {
 676             sum = CORD_cat(forest[i].c, sum);
 677             sum_len += forest[i].len;
 678         }
 679         i++;
 680     }
 681     return(sum);
 682 }
 683 
 684 /* Insert the frontier of x into forest.  Balanced subtrees are */
 685 /* treated as leaves.  This potentially adds one to the depth   */
 686 /* of the final tree.                                           */
 687 void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
 688 {
 689     register int depth;
 690     
 691     if (CORD_IS_STRING(x)) {
 692         CORD_add_forest(forest, x, len);
 693     } else if (IS_CONCATENATION(x)
 694                && ((depth = DEPTH(x)) >= MAX_DEPTH
 695                    || len < min_len[depth])) {
 696         register struct Concatenation * conc
 697                         = &(((CordRep *)x) -> concatenation);
 698         size_t left_len = LEFT_LEN(conc);
 699         
 700         CORD_balance_insert(conc -> left, left_len, forest);
 701         CORD_balance_insert(conc -> right, len - left_len, forest);
 702     } else /* function or balanced */ {
 703         CORD_add_forest(forest, x, len);
 704     }
 705 }
 706 
 707 
 708 CORD CORD_balance(CORD x)
 709 {
 710     Forest forest;
 711     register size_t len;
 712     
 713     if (x == 0) return(0);
 714     if (CORD_IS_STRING(x)) return(x);
 715     if (!min_len_init) CORD_init_min_len();
 716     len = LEN(x);
 717     CORD_init_forest(forest, len);
 718     CORD_balance_insert(x, len, forest);
 719     return(CORD_concat_forest(forest, len));
 720 }
 721 
 722 
 723 /* Position primitives  */
 724 
 725 /* Private routines to deal with the hard cases only: */
 726 
 727 /* P contains a prefix of the  path to cur_pos. Extend it to a full     */
 728 /* path and set up leaf info.                                           */
 729 /* Return 0 if past the end of cord, 1 o.w.                             */
 730 void CORD__extend_path(register CORD_pos p)
 731 {
 732      register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
 733      register CORD top = current_pe -> pe_cord;
 734      register size_t pos = p[0].cur_pos;
 735      register size_t top_pos = current_pe -> pe_start_pos;
 736      register size_t top_len = GEN_LEN(top);
 737      
 738      /* Fill in the rest of the path. */
 739        while(!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
 740          register struct Concatenation * conc =
 741                         &(((CordRep *)top) -> concatenation);
 742          register size_t left_len;
 743          
 744          left_len = LEFT_LEN(conc);
 745          current_pe++;
 746          if (pos >= top_pos + left_len) {
 747              current_pe -> pe_cord = top = conc -> right;
 748              current_pe -> pe_start_pos = top_pos = top_pos + left_len;
 749              top_len -= left_len;
 750          } else {
 751              current_pe -> pe_cord = top = conc -> left;
 752              current_pe -> pe_start_pos = top_pos;
 753              top_len = left_len;
 754          }
 755          p[0].path_len++;
 756        }
 757      /* Fill in leaf description for fast access. */
 758        if (CORD_IS_STRING(top)) {
 759          p[0].cur_leaf = top;
 760          p[0].cur_start = top_pos;
 761          p[0].cur_end = top_pos + top_len;
 762        } else {
 763          p[0].cur_end = 0;
 764        }
 765        if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
 766 }
 767 
 768 char CORD__pos_fetch(register CORD_pos p)
 769 {
 770     /* Leaf is a function node */
 771     struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
 772     CORD leaf = pe -> pe_cord;
 773     register struct Function * f = &(((CordRep *)leaf) -> function);
 774     
 775     if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
 776     return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
 777 }
 778 
 779 void CORD__next(register CORD_pos p)
 780 {
 781     register size_t cur_pos = p[0].cur_pos + 1;
 782     register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
 783     register CORD leaf = current_pe -> pe_cord;
 784     
 785     /* Leaf is not a string or we're at end of leaf */
 786     p[0].cur_pos = cur_pos;
 787     if (!CORD_IS_STRING(leaf)) {
 788         /* Function leaf        */
 789         register struct Function * f = &(((CordRep *)leaf) -> function);
 790         register size_t start_pos = current_pe -> pe_start_pos;
 791         register size_t end_pos = start_pos + f -> len;
 792         
 793         if (cur_pos < end_pos) {
 794           /* Fill cache and return. */
 795             register size_t i;
 796             register size_t limit = cur_pos + FUNCTION_BUF_SZ;
 797             register CORD_fn fn = f -> fn;
 798             register void * client_data = f -> client_data;
 799             
 800             if (limit > end_pos) {
 801                 limit = end_pos;
 802             }
 803             for (i = cur_pos; i < limit; i++) {
 804                 p[0].function_buf[i - cur_pos] =
 805                         (*fn)(i - start_pos, client_data);
 806             }
 807             p[0].cur_start = cur_pos;
 808             p[0].cur_leaf = p[0].function_buf;
 809             p[0].cur_end = limit;
 810             return;
 811         }
 812     }
 813     /* End of leaf      */
 814     /* Pop the stack until we find two concatenation nodes with the     */
 815     /* same start position: this implies we were in left part.          */
 816     {
 817         while (p[0].path_len > 0
 818                && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
 819             p[0].path_len--;
 820             current_pe--;
 821         }
 822         if (p[0].path_len == 0) {
 823             p[0].path_len = CORD_POS_INVALID;
 824             return;
 825         }
 826     }
 827     p[0].path_len--;
 828     CORD__extend_path(p);
 829 }
 830 
 831 void CORD__prev(register CORD_pos p)
 832 {
 833     register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
 834     
 835     if (p[0].cur_pos == 0) {
 836         p[0].path_len = CORD_POS_INVALID;
 837         return;
 838     }
 839     p[0].cur_pos--;
 840     if (p[0].cur_pos >= pe -> pe_start_pos) return;
 841     
 842     /* Beginning of leaf        */
 843     
 844     /* Pop the stack until we find two concatenation nodes with the     */
 845     /* different start position: this implies we were in right part.    */
 846     {
 847         register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
 848         
 849         while (p[0].path_len > 0
 850                && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
 851             p[0].path_len--;
 852             current_pe--;
 853         }
 854     }
 855     p[0].path_len--;
 856     CORD__extend_path(p);
 857 }
 858 
 859 #undef CORD_pos_fetch
 860 #undef CORD_next
 861 #undef CORD_prev
 862 #undef CORD_pos_to_index
 863 #undef CORD_pos_to_cord
 864 #undef CORD_pos_valid
 865 
 866 char CORD_pos_fetch(register CORD_pos p)
 867 {
 868     if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
 869         return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
 870     } else {
 871         return(CORD__pos_fetch(p));
 872     }
 873 }
 874 
 875 void CORD_next(CORD_pos p)
 876 {
 877     if (p[0].cur_pos < p[0].cur_end - 1) {
 878         p[0].cur_pos++;
 879     } else {
 880         CORD__next(p);
 881     }
 882 }
 883 
 884 void CORD_prev(CORD_pos p)
 885 {
 886     if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
 887         p[0].cur_pos--;
 888     } else {
 889         CORD__prev(p);
 890     }
 891 }
 892 
 893 size_t CORD_pos_to_index(CORD_pos p)
 894 {
 895     return(p[0].cur_pos);
 896 }
 897 
 898 CORD CORD_pos_to_cord(CORD_pos p)
 899 {
 900     return(p[0].path[0].pe_cord);
 901 }
 902 
 903 int CORD_pos_valid(CORD_pos p)
 904 {
 905     return(p[0].path_len != CORD_POS_INVALID);
 906 }
 907 
 908 void CORD_set_pos(CORD_pos p, CORD x, size_t i)
 909 {
 910     if (x == CORD_EMPTY) {
 911         p[0].path_len = CORD_POS_INVALID;
 912         return;
 913     }
 914     p[0].path[0].pe_cord = x;
 915     p[0].path[0].pe_start_pos = 0;
 916     p[0].path_len = 0;
 917     p[0].cur_pos = i;
 918     CORD__extend_path(p);
 919 }

/* [<][>][^][v][top][bottom][index][help] */