root/gc/include/cord.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* 
   2  * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
   3  *
   4  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   5  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   6  *
   7  * Permission is hereby granted to use or copy this program
   8  * for any purpose,  provided the above notices are retained on all copies.
   9  * Permission to modify the code and to distribute modified code is granted,
  10  * provided the above notices are retained, and a notice that the code was
  11  * modified is included with the above copyright notice.
  12  *
  13  * Author: Hans-J. Boehm (boehm@parc.xerox.com)
  14  */
  15 /* Boehm, October 5, 1995 4:20 pm PDT */
  16  
  17 /*
  18  * Cords are immutable character strings.  A number of operations
  19  * on long cords are much more efficient than their strings.h counterpart.
  20  * In particular, concatenation takes constant time independent of the length
  21  * of the arguments.  (Cords are represented as trees, with internal
  22  * nodes representing concatenation and leaves consisting of either C
  23  * strings or a functional description of the string.)
  24  *
  25  * The following are reasonable applications of cords.  They would perform
  26  * unacceptably if C strings were used:
  27  * - A compiler that produces assembly language output by repeatedly
  28  *   concatenating instructions onto a cord representing the output file.
  29  * - A text editor that converts the input file to a cord, and then
  30  *   performs editing operations by producing a new cord representing
  31  *   the file after echa character change (and keeping the old ones in an
  32  *   edit history)
  33  *
  34  * For optimal performance, cords should be built by
  35  * concatenating short sections.
  36  * This interface is designed for maximum compatibility with C strings.
  37  * ASCII NUL characters may be embedded in cords using CORD_from_fn.
  38  * This is handled correctly, but CORD_to_char_star will produce a string
  39  * with embedded NULs when given such a cord. 
  40  *
  41  * This interface is fairly big, largely for performance reasons.
  42  * The most basic constants and functions:
  43  *
  44  * CORD - the type of a cord;
  45  * CORD_EMPTY - empty cord;
  46  * CORD_len(cord) - length of a cord;
  47  * CORD_cat(cord1,cord2) - concatenation of two cords;
  48  * CORD_substr(cord, start, len) - substring (or subcord);
  49  * CORD_pos i;  CORD_FOR(i, cord) {  ... CORD_pos_fetch(i) ... } -
  50  *    examine each character in a cord.  CORD_pos_fetch(i) is the char.
  51  * CORD_fetch(int i) - Retrieve i'th character (slowly).
  52  * CORD_cmp(cord1, cord2) - compare two cords.
  53  * CORD_from_file(FILE * f) - turn a read-only file into a cord.
  54  * CORD_to_char_star(cord) - convert to C string.
  55  *   (Non-NULL C constant strings are cords.)
  56  * CORD_printf (etc.) - cord version of printf. Use %r for cords.
  57  */
  58 # ifndef CORD_H
  59 
  60 # define CORD_H
  61 # include <stddef.h>
  62 # include <stdio.h>
  63 /* Cords have type const char *.  This is cheating quite a bit, and not */
  64 /* 100% portable.  But it means that nonempty character string          */
  65 /* constants may be used as cords directly, provided the string is      */
  66 /* never modified in place.  The empty cord is represented by, and      */
  67 /* can be written as, 0.                                                */
  68 
  69 typedef const char * CORD;
  70 
  71 /* An empty cord is always represented as nil   */
  72 # define CORD_EMPTY 0
  73 
  74 /* Is a nonempty cord represented as a C string? */
  75 #define CORD_IS_STRING(s) (*(s) != '\0')
  76 
  77 /* Concatenate two cords.  If the arguments are C strings, they may     */
  78 /* not be subsequently altered.                                         */
  79 CORD CORD_cat(CORD x, CORD y);
  80 
  81 /* Concatenate a cord and a C string with known length.  Except for the */
  82 /* empty string case, this is a special case of CORD_cat.  Since the    */
  83 /* length is known, it can be faster.                                   */
  84 /* The string y is shared with the resulting CORD.  Hence it should     */
  85 /* not be altered by the caller.                                        */
  86 CORD CORD_cat_char_star(CORD x, const char * y, size_t leny);
  87 
  88 /* Compute the length of a cord */
  89 size_t CORD_len(CORD x);
  90 
  91 /* Cords may be represented by functions defining the ith character */
  92 typedef char (* CORD_fn)(size_t i, void * client_data);
  93 
  94 /* Turn a functional description into a cord.   */
  95 CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len);
  96 
  97 /* Return the substring (subcord really) of x with length at most n,    */
  98 /* starting at position i.  (The initial character has position 0.)     */
  99 CORD CORD_substr(CORD x, size_t i, size_t n);
 100 
 101 /* Return the argument, but rebalanced to allow more efficient          */
 102 /* character retrieval, substring operations, and comparisons.          */
 103 /* This is useful only for cords that were built using repeated         */
 104 /* concatenation.  Guarantees log time access to the result, unless     */
 105 /* x was obtained through a large number of repeated substring ops      */
 106 /* or the embedded functional descriptions take longer to evaluate.     */
 107 /* May reallocate significant parts of the cord.  The argument is not   */
 108 /* modified; only the result is balanced.                               */
 109 CORD CORD_balance(CORD x);
 110 
 111 /* The following traverse a cord by applying a function to each         */
 112 /* character.  This is occasionally appropriate, especially where       */
 113 /* speed is crucial.  But, since C doesn't have nested functions,       */
 114 /* clients of this sort of traversal are clumsy to write.  Consider     */
 115 /* the functions that operate on cord positions instead.                */
 116 
 117 /* Function to iteratively apply to individual characters in cord.      */
 118 typedef int (* CORD_iter_fn)(char c, void * client_data);
 119 
 120 /* Function to apply to substrings of a cord.  Each substring is a      */
 121 /* a C character string, not a general cord.                            */
 122 typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data);
 123 # define CORD_NO_FN ((CORD_batched_iter_fn)0)
 124 
 125 /* Apply f1 to each character in the cord, in ascending order,          */
 126 /* starting at position i. If                                           */
 127 /* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by   */
 128 /* a single call to f2.  The parameter f2 is provided only to allow     */
 129 /* some optimization by the client.  This terminates when the right     */
 130 /* end of this string is reached, or when f1 or f2 return != 0.  In the */
 131 /* latter case CORD_iter returns != 0.  Otherwise it returns 0.         */
 132 /* The specified value of i must be < CORD_len(x).                      */
 133 int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
 134                CORD_batched_iter_fn f2, void * client_data);
 135 
 136 /* A simpler version that starts at 0, and without f2:  */
 137 int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data);
 138 # define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd)
 139 
 140 /* Similar to CORD_iter5, but end-to-beginning. No provisions for       */
 141 /* CORD_batched_iter_fn.                                                */
 142 int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data);
 143 
 144 /* A simpler version that starts at the end:    */
 145 int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data);
 146 
 147 /* Functions that operate on cord positions.  The easy way to traverse  */
 148 /* cords.  A cord position is logically a pair consisting of a cord     */
 149 /* and an index into that cord.  But it is much faster to retrieve a    */
 150 /* charcter based on a position than on an index.  Unfortunately,       */
 151 /* positions are big (order of a few 100 bytes), so allocate them with  */
 152 /* caution.                                                             */
 153 /* Things in cord_pos.h should be treated as opaque, except as          */
 154 /* described below.  Also note that                                     */
 155 /* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function */
 156 /* definitions.  The former may evaluate their argument more than once. */
 157 # include "private/cord_pos.h"
 158 
 159 /*
 160         Visible definitions from above:
 161         
 162         typedef <OPAQUE but fairly big> CORD_pos[1];
 163         
 164         * Extract the cord from a position:
 165         CORD CORD_pos_to_cord(CORD_pos p);
 166         
 167         * Extract the current index from a position:
 168         size_t CORD_pos_to_index(CORD_pos p);
 169         
 170         * Fetch the character located at the given position:
 171         char CORD_pos_fetch(CORD_pos p);
 172         
 173         * Initialize the position to refer to the given cord and index.
 174         * Note that this is the most expensive function on positions:
 175         void CORD_set_pos(CORD_pos p, CORD x, size_t i);
 176         
 177         * Advance the position to the next character.
 178         * P must be initialized and valid.
 179         * Invalidates p if past end:
 180         void CORD_next(CORD_pos p);
 181         
 182         * Move the position to the preceding character.
 183         * P must be initialized and valid.
 184         * Invalidates p if past beginning:
 185         void CORD_prev(CORD_pos p);
 186         
 187         * Is the position valid, i.e. inside the cord?
 188         int CORD_pos_valid(CORD_pos p);
 189 */
 190 # define CORD_FOR(pos, cord) \
 191     for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos))
 192 
 193                         
 194 /* An out of memory handler to call.  May be supplied by client.        */
 195 /* Must not return.                                                     */
 196 extern void (* CORD_oom_fn)(void);
 197 
 198 /* Dump the representation of x to stdout in an implementation defined  */
 199 /* manner.  Intended for debugging only.                                */
 200 void CORD_dump(CORD x);
 201 
 202 /* The following could easily be implemented by the client.  They are   */
 203 /* provided in cordxtra.c for convenience.                              */
 204 
 205 /* Concatenate a character to the end of a cord.        */
 206 CORD CORD_cat_char(CORD x, char c);
 207 
 208 /* Concatenate n cords. */
 209 CORD CORD_catn(int n, /* CORD */ ...);
 210 
 211 /* Return the character in CORD_substr(x, i, 1)         */
 212 char CORD_fetch(CORD x, size_t i);
 213 
 214 /* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y      */
 215 int CORD_cmp(CORD x, CORD y);
 216 
 217 /* A generalization that takes both starting positions for the          */
 218 /* comparison, and a limit on the number of characters to be compared.  */
 219 int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len);
 220 
 221 /* Find the first occurrence of s in x at position start or later.      */
 222 /* Return the position of the first character of s in x, or             */
 223 /* CORD_NOT_FOUND if there is none.                                     */
 224 size_t CORD_str(CORD x, size_t start, CORD s);
 225 
 226 /* Return a cord consisting of i copies of (possibly NUL) c.  Dangerous */
 227 /* in conjunction with CORD_to_char_star.                               */
 228 /* The resulting representation takes constant space, independent of i. */
 229 CORD CORD_chars(char c, size_t i);
 230 # define CORD_nul(i) CORD_chars('\0', (i))
 231 
 232 /* Turn a file into cord.  The file must be seekable.  Its contents     */
 233 /* must remain constant.  The file may be accessed as an immediate      */
 234 /* result of this call and/or as a result of subsequent accesses to     */
 235 /* the cord.  Short files are likely to be immediately read, but        */
 236 /* long files are likely to be read on demand, possibly relying on      */
 237 /* stdio for buffering.                                                 */
 238 /* We must have exclusive access to the descriptor f, i.e. we may       */
 239 /* read it at any time, and expect the file pointer to be               */
 240 /* where we left it.  Normally this should be invoked as                */
 241 /* CORD_from_file(fopen(...))                                           */
 242 /* CORD_from_file arranges to close the file descriptor when it is no   */
 243 /* longer needed (e.g. when the result becomes inaccessible).           */ 
 244 /* The file f must be such that ftell reflects the actual character     */
 245 /* position in the file, i.e. the number of characters that can be      */
 246 /* or were read with fread.  On UNIX systems this is always true.  On   */
 247 /* MS Windows systems, f must be opened in binary mode.                 */
 248 CORD CORD_from_file(FILE * f);
 249 
 250 /* Equivalent to the above, except that the entire file will be read    */
 251 /* and the file pointer will be closed immediately.                     */
 252 /* The binary mode restriction from above does not apply.               */
 253 CORD CORD_from_file_eager(FILE * f);
 254 
 255 /* Equivalent to the above, except that the file will be read on demand.*/
 256 /* The binary mode restriction applies.                                 */
 257 CORD CORD_from_file_lazy(FILE * f);
 258 
 259 /* Turn a cord into a C string. The result shares no structure with     */
 260 /* x, and is thus modifiable.                                           */
 261 char * CORD_to_char_star(CORD x);
 262 
 263 /* Turn a C string into a CORD.  The C string is copied, and so may     */
 264 /* subsequently be modified.                                            */
 265 CORD CORD_from_char_star(const char *s);
 266 
 267 /* Identical to the above, but the result may share structure with      */
 268 /* the argument and is thus not modifiable.                             */
 269 const char * CORD_to_const_char_star(CORD x); 
 270 
 271 /* Write a cord to a file, starting at the current position.  No        */
 272 /* trailing NULs are newlines are added.                                */
 273 /* Returns EOF if a write error occurs, 1 otherwise.                    */
 274 int CORD_put(CORD x, FILE * f);
 275 
 276 /* "Not found" result for the following two functions.                  */
 277 # define CORD_NOT_FOUND ((size_t)(-1))
 278 
 279 /* A vague analog of strchr.  Returns the position (an integer, not     */
 280 /* a pointer) of the first occurrence of (char) c inside x at position  */
 281 /* i or later. The value i must be < CORD_len(x).                       */
 282 size_t CORD_chr(CORD x, size_t i, int c);
 283 
 284 /* A vague analog of strrchr.  Returns index of the last occurrence     */
 285 /* of (char) c inside x at position i or earlier. The value i           */
 286 /* must be < CORD_len(x).                                               */
 287 size_t CORD_rchr(CORD x, size_t i, int c);
 288 
 289 
 290 /* The following are also not primitive, but are implemented in         */
 291 /* cordprnt.c.  They provide functionality similar to the ANSI C        */
 292 /* functions with corresponding names, but with the following           */
 293 /* additions and changes:                                               */
 294 /* 1. A %r conversion specification specifies a CORD argument.  Field   */
 295 /*    width, precision, etc. have the same semantics as for %s.         */
 296 /*    (Note that %c,%C, and %S were already taken.)                     */
 297 /* 2. The format string is represented as a CORD.                       */
 298 /* 3. CORD_sprintf and CORD_vsprintf assign the result through the 1st  */      /*    argument. Unlike their ANSI C versions, there is no need to guess */
 299 /*    the correct buffer size.                                          */
 300 /* 4. Most of the conversions are implement through the native          */
 301 /*    vsprintf.  Hence they are usually no faster, and                  */
 302 /*    idiosyncracies of the native printf are preserved.  However,      */
 303 /*    CORD arguments to CORD_sprintf and CORD_vsprintf are NOT copied;  */
 304 /*    the result shares the original structure.  This may make them     */
 305 /*    very efficient in some unusual applications.                      */
 306 /*    The format string is copied.                                      */
 307 /* All functions return the number of characters generated or -1 on     */
 308 /* error.  This complies with the ANSI standard, but is inconsistent    */
 309 /* with some older implementations of sprintf.                          */
 310 
 311 /* The implementation of these is probably less portable than the rest  */
 312 /* of this package.                                                     */
 313 
 314 #ifndef CORD_NO_IO
 315 
 316 #include <stdarg.h>
 317 
 318 int CORD_sprintf(CORD * out, CORD format, ...);
 319 int CORD_vsprintf(CORD * out, CORD format, va_list args);
 320 int CORD_fprintf(FILE * f, CORD format, ...);
 321 int CORD_vfprintf(FILE * f, CORD format, va_list args);
 322 int CORD_printf(CORD format, ...);
 323 int CORD_vprintf(CORD format, va_list args);
 324 
 325 #endif /* CORD_NO_IO */
 326 
 327 # endif /* CORD_H */

/* [<][>][^][v][top][bottom][index][help] */