root/src/string.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. make_str
  2. make_str_body
  3. Scm_StringDump
  4. count_size_and_length
  5. count_length
  6. Scm_MBLen
  7. Scm_MakeString
  8. Scm_MakeFillString
  9. Scm_ListToString
  10. Scm_GetString
  11. get_string_from_body
  12. Scm_GetStringConst
  13. Scm_GetStringContent
  14. Scm_CopyStringWithFlags
  15. Scm_StringCompleteToIncompleteX
  16. Scm_StringCompleteToIncomplete
  17. Scm_StringIncompleteToCompleteX
  18. Scm_StringIncompleteToComplete
  19. Scm_StringEqual
  20. Scm_StringCmp
  21. sb_strcasecmp
  22. mb_strcasecmp
  23. Scm_StringCiCmp
  24. forward_pos
  25. Scm_StringRef
  26. Scm_StringByteRef
  27. Scm_StringPosition
  28. Scm_StringAppend2
  29. Scm_StringAppendC
  30. Scm_StringAppend
  31. Scm_StringJoin
  32. string_substitute
  33. Scm_StringSubstitute
  34. Scm_StringSet
  35. Scm_StringByteSet
  36. substring
  37. Scm_Substring
  38. Scm_MaybeSubstring
  39. Scm_StringSplitByChar
  40. boyer_moore
  41. string_scan
  42. Scm_StringScan
  43. Scm_StringScanChar
  44. Scm_StringToList
  45. Scm_StringFill
  46. Scm_ConstCStringArrayToList
  47. Scm_CStringArrayToList
  48. string_putc
  49. string_print
  50. Scm_MakeStringPointer
  51. Scm_StringPointerRef
  52. Scm_StringPointerNext
  53. Scm_StringPointerPrev
  54. Scm_StringPointerSet
  55. Scm_StringPointerSubstring
  56. Scm_StringPointerCopy
  57. Scm_StringPointerDump
  58. Scm_DStringInit
  59. Scm_DStringSize
  60. Scm__DStringRealloc
  61. dstring_getz
  62. Scm_DStringGet
  63. Scm_DStringGetz
  64. Scm_DStringPutz
  65. Scm_DStringAdd
  66. Scm_DStringPutb
  67. Scm_DStringPutc
  68. Scm_DStringDump

   1 /*
   2  * string.c - string implementation
   3  *
   4  *   Copyright (c) 2000-2005 Shiro Kawai, All rights reserved.
   5  * 
   6  *   Redistribution and use in source and binary forms, with or without
   7  *   modification, are permitted provided that the following conditions
   8  *   are met:
   9  * 
  10  *   1. Redistributions of source code must retain the above copyright
  11  *      notice, this list of conditions and the following disclaimer.
  12  *
  13  *   2. Redistributions in binary form must reproduce the above copyright
  14  *      notice, this list of conditions and the following disclaimer in the
  15  *      documentation and/or other materials provided with the distribution.
  16  *
  17  *   3. Neither the name of the authors nor the names of its contributors
  18  *      may be used to endorse or promote products derived from this
  19  *      software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  *  $Id: string.c,v 1.75 2005/10/13 08:14:13 shirok Exp $
  34  */
  35 
  36 #include <stdio.h>
  37 #include <ctype.h>
  38 #include <sys/types.h>
  39 #include <string.h>
  40 #define LIBGAUCHE_BODY
  41 #include "gauche.h"
  42 
  43 void Scm_DStringDump(FILE *out, ScmDString *dstr);
  44 
  45 static void string_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx);
  46 SCM_DEFINE_BUILTIN_CLASS(Scm_StringClass, string_print, NULL, NULL, NULL,
  47                          SCM_CLASS_SEQUENCE_CPL);
  48 
  49 #define CHECK_MUTABLE(str)                                              \
  50     do {                                                                \
  51         if (SCM_STRING_IMMUTABLE_P(str))                                \
  52             Scm_Error("attempted to modify immutable string: %S", str); \
  53     } while (0)
  54 
  55 /* Internal primitive constructor.   LEN can be negative if the string
  56    is incomplete. */
  57 static ScmString *make_str(int len, int siz, const char *p, int flags)
  58 {
  59     ScmString *s = SCM_NEW(ScmString);
  60     SCM_SET_CLASS(s, SCM_CLASS_STRING);
  61 
  62     if (len < 0) flags |= SCM_STRING_INCOMPLETE;
  63     if (flags & SCM_STRING_INCOMPLETE) len = siz;
  64 
  65     s->body = NULL;
  66     s->initialBody.flags = flags & SCM_STRING_FLAG_MASK;
  67     s->initialBody.length = len;
  68     s->initialBody.size = siz;
  69     s->initialBody.start = p;
  70     return s;
  71 }
  72 
  73 static ScmStringBody *make_str_body(int len, int siz, const char *p, int flags)
  74 {
  75     ScmStringBody *b = SCM_NEW(ScmStringBody);
  76     b->length = (len < 0)? siz : len;
  77     b->size = siz;
  78     b->start = p;
  79     b->flags = flags;
  80     return b;
  81 }
  82 
  83 #define DUMP_LENGTH   50
  84 
  85 /* for debug */
  86 #if SCM_DEBUG_HELPER
  87 void Scm_StringDump(FILE *out, ScmObj str)
  88 {
  89     int i;
  90     const ScmStringBody *b = SCM_STRING_BODY(str);
  91     int s = SCM_STRING_BODY_SIZE(b);
  92     const char *p = SCM_STRING_BODY_START(b);
  93 
  94     fprintf(out, "STR(len=%d,siz=%d) \"", SCM_STRING_BODY_LENGTH(b), s);
  95     for (i=0; i < DUMP_LENGTH && s > 0;) {
  96         int n = SCM_CHAR_NFOLLOWS(*p) + 1;
  97         for (; n > 0 && s > 0; p++, n--, s--, i++) {
  98             putc(*p, out);
  99         }
 100     }
 101     if (s > 0) {
 102         fputs("...\"\n", out);
 103     } else {
 104         fputs("\"\n", out);
 105     }       
 106 }
 107 #endif /*SCM_DEBUG_HELPER*/
 108 
 109 /*
 110  * Multibyte length calculation
 111  */
 112 
 113 /* We have multiple similar functions, due to performance reasons. */
 114 
 115 /* Calculate both length and size of C-string str.
 116    If str is incomplete, *plen gets -1. */
 117 static inline int count_size_and_length(const char *str, int *psize, int *plen)
 118 {
 119     char c;
 120     const char *p = str;
 121     int size = 0, len = 0;
 122     while ((c = *p++) != 0) {
 123         int i = SCM_CHAR_NFOLLOWS(c);
 124         len++;
 125         size++;
 126         while (i-- > 0) {
 127             if (!*p++) { len = -1; goto eos; }
 128             size++;
 129         }
 130     }
 131   eos:
 132     *psize = size;
 133     *plen = len;
 134     return len;
 135 }
 136 
 137 /* Calculate length of known size string.  str can contain NUL character. */
 138 static inline int count_length(const char *str, int size)
 139 {
 140     int count = 0;
 141 
 142     while (size-- > 0) {
 143         ScmChar ch;
 144         unsigned char c = (unsigned char)*str;
 145         int i = SCM_CHAR_NFOLLOWS(c);
 146         if (i < 0 || i > size) return -1;
 147         SCM_CHAR_GET(str, ch);
 148         if (ch == SCM_CHAR_INVALID) return -1;
 149         count++;
 150         str += i+1;
 151         size -= i;
 152     }
 153     return count;
 154 }
 155 
 156 /* Returns length of string, starts from str and end at stop.
 157    If stop is NULL, str is regarded as C-string (NUL terminated).
 158    If the string is incomplete, returns -1. */
 159 int Scm_MBLen(const char *str, const char *stop)
 160 {
 161     int size = (stop == NULL)? strlen(str) : (stop - str);
 162     return count_length(str, size);
 163 }
 164 
 165 /*----------------------------------------------------------------
 166  * Constructors
 167  */
 168 
 169 /* General constructor. */
 170 ScmObj Scm_MakeString(const char *str, int size, int len, int flags)
 171 {
 172     ScmString *s;
 173     
 174     if (size < 0) count_size_and_length(str, &size, &len);
 175     else if (len < 0) len = count_length(str, size);
 176     if (flags & SCM_MAKSTR_COPYING) {
 177         char *nstr = SCM_NEW_ATOMIC2(char *, size + 1);
 178         memcpy(nstr, str, size);
 179         nstr[size] = '\0';          /* be kind to C */
 180         s = make_str(len, size, nstr, flags);
 181     } else {
 182         s = make_str(len, size, str, flags);
 183     }
 184     return SCM_OBJ(s);
 185 }
 186 
 187 ScmObj Scm_MakeFillString(int len, ScmChar fill)
 188 {
 189     int size = SCM_CHAR_NBYTES(fill), i;
 190     char *ptr = SCM_NEW_ATOMIC2(char *, size*len+1);
 191     char *p;
 192 
 193     if (len < 0) Scm_Error("length out of range: %d", len);
 194     for (i=0, p=ptr; i<len; i++, p+=size) {
 195         SCM_CHAR_PUT(p, fill);
 196     }
 197     ptr[size*len] = '\0';
 198     return SCM_OBJ(make_str(len, size*len, ptr, 0));
 199 }
 200 
 201 ScmObj Scm_ListToString(ScmObj chars)
 202 {
 203     ScmObj cp;
 204     int size = 0, len = 0;
 205     ScmChar ch;
 206     char *buf, *bufp;
 207 
 208     SCM_FOR_EACH(cp, chars) {
 209         if (!SCM_CHARP(SCM_CAR(cp))) 
 210             Scm_Error("character required, but got %S", SCM_CAR(cp));
 211         ch = SCM_CHAR_VALUE(SCM_CAR(cp));
 212         size += SCM_CHAR_NBYTES(ch);
 213         len++;
 214     }
 215     bufp = buf = SCM_NEW_ATOMIC2(char *, size+1);
 216     SCM_FOR_EACH(cp, chars) {
 217         ch = SCM_CHAR_VALUE(SCM_CAR(cp));
 218         SCM_CHAR_PUT(bufp, ch);
 219         bufp += SCM_CHAR_NBYTES(ch);
 220     }
 221     *bufp = '\0';
 222     return Scm_MakeString(buf, size, len, 0);
 223 }
 224 
 225 /* Extract string as C-string.  This one guarantees to return
 226    mutable string (we always copy) */
 227 char *Scm_GetString(ScmString *str)
 228 {
 229     int size;
 230     char *p;
 231     const ScmStringBody *b = SCM_STRING_BODY(str);
 232 
 233     size = SCM_STRING_BODY_SIZE(b);
 234     p = SCM_NEW_ATOMIC2(char *, size+1);
 235     memcpy(p, SCM_STRING_BODY_START(b), size);
 236     p[size] = '\0';
 237     return p;
 238 }
 239 
 240 /* Common routine for Scm_GetStringConst and Scm_GetStringContent */
 241 static const char *get_string_from_body(const ScmStringBody *b)
 242 {
 243     int size = SCM_STRING_BODY_SIZE(b);
 244     if (SCM_STRING_BODY_START(b)[size] == '\0') {
 245         /* we can use string data as C-string */
 246         return SCM_STRING_BODY_START(b);
 247     } else {
 248         char *p = SCM_NEW_ATOMIC2(char *, size+1);
 249         memcpy(p, SCM_STRING_BODY_START(b), size);
 250         p[size] = '\0';
 251         /* kludge! This breaks 'const' qualification, but we know
 252            this is an idempotent operation from the outside */
 253         ((ScmStringBody*)b)->start = p; /* discard const qualifier */
 254         return p;
 255     }
 256 }
 257 
 258 
 259 /* Extract string as C-string.  Returned string is immutable,
 260    so we can directly return the body of the string. */
 261 const char *Scm_GetStringConst(ScmString *str)
 262 {
 263     return get_string_from_body(SCM_STRING_BODY(str));
 264 }
 265 
 266 /* Atomically extracts C-string, length, size, and incomplete flag.
 267    MT-safe. */
 268 const char *Scm_GetStringContent(ScmString *str,
 269                                  unsigned int *psize,   /* out */
 270                                  unsigned int *plength, /* out */
 271                                  unsigned int *pflags)  /* out */
 272 {
 273     const ScmStringBody *b = SCM_STRING_BODY(str);
 274     if (psize)   *psize = SCM_STRING_BODY_SIZE(b);
 275     if (plength) *plength = SCM_STRING_BODY_LENGTH(b);
 276     if (pflags) *pflags = SCM_STRING_BODY_FLAGS(b);
 277     return get_string_from_body(b);
 278 }
 279 
 280 
 281 /* Copy string.  You can modify the flags of the newly created string
 282    by FLAGS and MASK arguments; for the bits set in MASK, corresponding
 283    bits in FLAGS are copied to the new string, and for other bits, the  
 284    original flags are copied.
 285 
 286    The typical semantics of copy-string is achieved by passing 0 to
 287    FLAGS and SCM_STRING_IMMUTABLE to MASK (i.e. reset IMMUTABLE flag,
 288    and keep other flags intact.
 289 
 290    NB: This routine doesn't check whether specified flag is valid
 291    with the string content, i.e. you can drop INCOMPLETE flag with
 292    copying, while the string content won't be checked if it consists
 293    valid complete string. */
 294 ScmObj Scm_CopyStringWithFlags(ScmString *x, int flags, int mask)
 295 {
 296     const ScmStringBody *b = SCM_STRING_BODY(x);
 297     int size = SCM_STRING_BODY_SIZE(b);
 298     int len  = SCM_STRING_BODY_LENGTH(b);
 299     const char *start = SCM_STRING_BODY_START(b);
 300     int newflags = ((SCM_STRING_BODY_FLAGS(b) & ~mask)
 301                     | (flags & mask));
 302         
 303     return SCM_OBJ(make_str(len, size, start, newflags));
 304 }
 305 
 306 ScmObj Scm_StringCompleteToIncompleteX(ScmString *x)
 307 {
 308     const ScmStringBody *b;
 309     CHECK_MUTABLE(x);
 310     b = SCM_STRING_BODY(x);
 311     x->body = make_str_body(SCM_STRING_BODY_SIZE(b),
 312                             SCM_STRING_BODY_SIZE(b),
 313                             SCM_STRING_BODY_START(b),
 314                             SCM_STRING_BODY_FLAGS(b) | SCM_STRING_INCOMPLETE);
 315     return SCM_OBJ(x);
 316 }
 317 
 318 ScmObj Scm_StringCompleteToIncomplete(ScmString *x)
 319 {
 320     return Scm_CopyStringWithFlags(x, SCM_STRING_INCOMPLETE,
 321                                    SCM_STRING_INCOMPLETE);
 322 }
 323 
 324 /* DEPRECATED.  MT-UNSAFE */
 325 ScmObj Scm_StringIncompleteToCompleteX(ScmString *x)
 326 {
 327     ScmStringBody *b;
 328     CHECK_MUTABLE(x);
 329     b = (ScmStringBody*)SCM_STRING_BODY(x);
 330     if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
 331         int len = count_length(SCM_STRING_BODY_START(b),
 332                                SCM_STRING_BODY_SIZE(b));
 333         if (len < 0) return SCM_FALSE;
 334         b->flags &= ~SCM_STRING_INCOMPLETE;
 335         b->length = len;
 336     }
 337     return SCM_OBJ(x);
 338 }
 339 
 340 ScmObj Scm_StringIncompleteToComplete(ScmString *x)
 341 {
 342     return Scm_StringIncompleteToCompleteX(SCM_STRING(Scm_CopyString(x)));
 343 }
 344 
 345 /*----------------------------------------------------------------
 346  * Comparison
 347  */
 348 
 349 /* TODO: merge Equal and Cmp API; required generic comparison protocol */
 350 int Scm_StringEqual(ScmString *x, ScmString *y)
 351 {
 352     const ScmStringBody *xb = SCM_STRING_BODY(x);
 353     const ScmStringBody *yb = SCM_STRING_BODY(y);
 354     if ((SCM_STRING_BODY_FLAGS(xb)^SCM_STRING_BODY_FLAGS(yb))&SCM_STRING_INCOMPLETE) {
 355         return FALSE;
 356     }
 357     if (SCM_STRING_BODY_SIZE(xb) != SCM_STRING_BODY_SIZE(yb)) {
 358         return FALSE;
 359     }
 360     return (memcmp(SCM_STRING_BODY_START(xb),
 361                    SCM_STRING_BODY_START(yb),
 362                    SCM_STRING_BODY_SIZE(xb)) == 0? TRUE : FALSE);
 363 }
 364 
 365 int Scm_StringCmp(ScmString *x, ScmString *y)
 366 {
 367     int sizx, sizy, siz, r;
 368     const ScmStringBody *xb = SCM_STRING_BODY(x);
 369     const ScmStringBody *yb = SCM_STRING_BODY(y);
 370     if ((SCM_STRING_BODY_FLAGS(xb)^SCM_STRING_BODY_FLAGS(yb))&SCM_STRING_INCOMPLETE) {
 371         Scm_Error("cannot compare incomplete vs complete string: %S, %S",
 372                   SCM_OBJ(x), SCM_OBJ(y));
 373     }
 374     sizx = SCM_STRING_BODY_SIZE(xb);
 375     sizy = SCM_STRING_BODY_SIZE(yb);
 376     siz = (sizx < sizy)? sizx : sizy;
 377     r = memcmp(SCM_STRING_BODY_START(xb), SCM_STRING_BODY_START(yb), siz);
 378     if (r == 0) return (sizx - sizy);
 379     else return r;
 380 }
 381 
 382 /* single-byte case insensitive comparison */
 383 static int sb_strcasecmp(const char *px, int sizx,
 384                          const char *py, int sizy)
 385 {
 386     char cx, cy;
 387     for (; sizx > 0 && sizy > 0; sizx--, sizy--, px++, py++) {
 388         cx = tolower(*px);
 389         cy = tolower(*py);
 390         if (cx == cy) continue;
 391         return (cx - cy);
 392     }
 393     if (sizx > 0) return 1;
 394     if (sizy > 0) return -1;
 395     return 0;
 396 }
 397 
 398 /* multi-byte case insensitive comparison */
 399 static int mb_strcasecmp(const char *px, int lenx,
 400                          const char *py, int leny)
 401 {
 402     int cx, cy, ccx, ccy, ix, iy;
 403     for (; lenx > 0 && leny > 0; lenx--, leny--, px+=ix, py+=iy) {
 404         SCM_CHAR_GET(px, cx);
 405         SCM_CHAR_GET(py, cy);
 406         ccx = SCM_CHAR_UPCASE(cx);
 407         ccy = SCM_CHAR_UPCASE(cy);
 408         if (ccx != ccy) return (ccx - ccy);
 409         ix = SCM_CHAR_NBYTES(cx);
 410         iy = SCM_CHAR_NBYTES(cy);
 411     }
 412     if (lenx > 0) return 1;
 413     if (leny > 0) return -1;
 414     return 0;
 415 }
 416 
 417 int Scm_StringCiCmp(ScmString *x, ScmString *y)
 418 {
 419     int sizx, lenx, sizy, leny;
 420     const char *px, *py;
 421     const ScmStringBody *xb = SCM_STRING_BODY(x);
 422     const ScmStringBody *yb = SCM_STRING_BODY(y);
 423     
 424     if ((SCM_STRING_BODY_FLAGS(xb)^SCM_STRING_BODY_FLAGS(yb))&SCM_STRING_INCOMPLETE) {
 425         Scm_Error("cannot compare incomplete strings in case-insensitive way: %S, %S",
 426                   SCM_OBJ(x), SCM_OBJ(y));
 427     }
 428     sizx = SCM_STRING_BODY_SIZE(xb); lenx = SCM_STRING_BODY_SIZE(xb);
 429     sizy = SCM_STRING_BODY_SIZE(yb); leny = SCM_STRING_BODY_SIZE(yb);
 430     px = SCM_STRING_BODY_START(xb);
 431     py = SCM_STRING_BODY_START(yb);
 432     
 433     if (sizx == lenx && sizy == leny) {
 434         return sb_strcasecmp(px, sizx, py, sizy);
 435     } else {
 436         return mb_strcasecmp(px, lenx, py, leny);
 437     }
 438 }
 439 
 440 /*----------------------------------------------------------------
 441  * Reference
 442  */
 443 
 444 /* Internal fn for index -> position.  Args assumed in boundary. */
 445 static const char *forward_pos(const char *current, int offset)
 446 {
 447     int n;
 448     
 449     while (offset--) {
 450         n = SCM_CHAR_NFOLLOWS(*current);
 451         current += n + 1;
 452     }
 453     return current;
 454 }
 455 
 456 /* string-ref.
 457  * If POS is out of range,
 458  *   - returns SCM_CHAR_INVALID if range_error is FALSE
 459  *   - raise error otherwise.
 460  * This differs from Scheme version, which takes an optional 'fallback'
 461  * argument which will be returned when POS is out-of-range.  We can't
 462  * have the same semantics since the return type is limited.
 463  */
 464 ScmChar Scm_StringRef(ScmString *str, int pos, int range_error)
 465 {
 466     const ScmStringBody *b = SCM_STRING_BODY(str);
 467     int len = SCM_STRING_BODY_LENGTH(b);
 468 
 469     /* we can't allow string-ref on incomplete strings, since it may yield
 470        invalid character object. */
 471     if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
 472         Scm_Error("incomplete string not allowed : %S", str);
 473     }
 474     if (pos < 0 || pos >= len) {
 475         if (range_error) {
 476             Scm_Error("argument out of range: %d", pos);
 477         } else {
 478             return SCM_CHAR_INVALID;
 479         }
 480     }
 481     if (SCM_STRING_BODY_SINGLE_BYTE_P(b)) {
 482         return (ScmChar)(((unsigned char *)SCM_STRING_BODY_START(b))[pos]);
 483     } else {
 484         const char *p = forward_pos(SCM_STRING_BODY_START(b), pos);
 485         ScmChar c;
 486         SCM_CHAR_GET(p, c);
 487         return c;
 488     }
 489 }
 490 
 491 /* The meaning and rationale of range_error is the same as Scm_StringRef.
 492  * Returns -1 if OFFSET is out-of-range and RANGE_ERROR is FALSE.
 493  * (Because of this, the return type is not ScmByte but int.
 494  */
 495 int Scm_StringByteRef(ScmString *str, int offset, int range_error)
 496 {
 497     const ScmStringBody *b = SCM_STRING_BODY(str);
 498     if (offset < 0 || offset >= SCM_STRING_BODY_SIZE(b)) {
 499         if (range_error) {
 500             Scm_Error("argument out of range: %d", offset);
 501         } else {
 502             return -1;
 503         }
 504     }
 505     return (ScmByte)SCM_STRING_BODY_START(b)[offset];
 506 }
 507 
 508 /* External interface of forward_pos.  Returns the pointer to the
 509    offset-th character in str. */
 510 /* NB: this function allows offset == length of the string; in that
 511    case, the return value points the location past the string body,
 512    but it is necessary sometimes to do a pointer arithmetic with the
 513    returned values. */
 514 const char *Scm_StringPosition(ScmString *str, int offset)
 515 {
 516     const ScmStringBody *b = SCM_STRING_BODY(str);
 517     if (offset < 0 || offset > SCM_STRING_BODY_LENGTH(b)) {
 518         Scm_Error("argument out of range: %d", offset);
 519     }
 520     if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
 521         return (SCM_STRING_BODY_START(b)+offset);
 522     } else {
 523         return (forward_pos(SCM_STRING_BODY_START(b), offset));
 524     }
 525 }
 526 
 527 /*----------------------------------------------------------------
 528  * Concatenation
 529  */
 530 
 531 ScmObj Scm_StringAppend2(ScmString *x, ScmString *y)
 532 {
 533     const ScmStringBody *xb = SCM_STRING_BODY(x);
 534     const ScmStringBody *yb = SCM_STRING_BODY(y);
 535     int sizex = SCM_STRING_BODY_SIZE(xb), lenx = SCM_STRING_BODY_LENGTH(xb);
 536     int sizey = SCM_STRING_BODY_SIZE(yb), leny = SCM_STRING_BODY_LENGTH(yb);
 537     int flags = 0;
 538     char *p = SCM_NEW_ATOMIC2(char *,sizex + sizey + 1);
 539 
 540     memcpy(p, xb->start, sizex);
 541     memcpy(p+sizex, yb->start, sizey);
 542     p[sizex + sizey] = '\0';
 543 
 544     if (SCM_STRING_BODY_INCOMPLETE_P(xb) || SCM_STRING_BODY_INCOMPLETE_P(yb)) {
 545         flags |= SCM_STRING_INCOMPLETE; /* yields incomplete string */
 546     }
 547     return SCM_OBJ(make_str(lenx+leny, sizex+sizey, p, flags));
 548 }
 549 
 550 ScmObj Scm_StringAppendC(ScmString *x, const char *str, int sizey, int leny)
 551 {
 552     const ScmStringBody *xb = SCM_STRING_BODY(x);
 553     int sizex = SCM_STRING_BODY_SIZE(xb), lenx = SCM_STRING_BODY_LENGTH(xb);
 554     int flags = 0;
 555     char *p;
 556 
 557     if (sizey < 0) count_size_and_length(str, &sizey, &leny);
 558     else if (leny < 0) leny = count_length(str, sizey);
 559     
 560     p = SCM_NEW_ATOMIC2(char *, sizex + sizey + 1);
 561     memcpy(p, xb->start, sizex);
 562     memcpy(p+sizex, str, sizey);
 563     p[sizex+sizey] = '\0';
 564 
 565     if (SCM_STRING_BODY_INCOMPLETE_P(xb) || leny < 0) {
 566         flags |= SCM_STRING_INCOMPLETE;
 567     }
 568     return SCM_OBJ(make_str(lenx + leny, sizex + sizey, p, flags));
 569 }
 570 
 571 ScmObj Scm_StringAppend(ScmObj strs)
 572 {
 573 #define BODY_ARRAY_SIZE 32
 574     ScmObj cp;
 575     int size = 0, len = 0, flags = 0, numstrs, i;
 576     char *buf, *bufp;
 577     const ScmStringBody *bodies_s[BODY_ARRAY_SIZE], **bodies;
 578 
 579     /* It is trickier than it appears, since the strings may be modified
 580        by another thread during we're dealing with it.  So in the first
 581        pass to sum up the lenghts of strings, we extract the string bodies
 582        and save it.  */
 583     numstrs = Scm_Length(strs);
 584     if (numstrs < 0) Scm_Error("improper list not allowed: %S", strs);
 585     if (numstrs >= BODY_ARRAY_SIZE) {
 586         bodies = SCM_NEW_ARRAY(const ScmStringBody*, numstrs);
 587     } else {
 588         bodies = bodies_s;
 589     }
 590 
 591     i=0;
 592     SCM_FOR_EACH(cp, strs) {
 593         const ScmStringBody *b;
 594         if (!SCM_STRINGP(SCM_CAR(cp))) {
 595             Scm_Error("string required, but got %S\n", SCM_CAR(cp));
 596         }
 597         b = SCM_STRING_BODY(SCM_CAR(cp));
 598         size += SCM_STRING_BODY_SIZE(b);
 599         len += SCM_STRING_BODY_LENGTH(b);
 600         if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
 601             flags |= SCM_STRING_INCOMPLETE;
 602         }
 603         bodies[i++] = b;
 604     }
 605 
 606     bufp = buf = SCM_NEW_ATOMIC2(char *, size+1);
 607     for (i=0; i<numstrs; i++) {
 608         const ScmStringBody *b = bodies[i];
 609         memcpy(bufp, SCM_STRING_BODY_START(b), SCM_STRING_BODY_SIZE(b));
 610         bufp += SCM_STRING_BODY_SIZE(b);
 611     }
 612     *bufp = '\0';
 613     bodies = NULL;              /* to help GC */
 614     return SCM_OBJ(make_str(len, size, buf, flags));
 615 #undef BODY_ARRAY_SIZE
 616 }
 617 
 618 ScmObj Scm_StringJoin(ScmObj strs, ScmString *delim, int grammer)
 619 {
 620 #define BODY_ARRAY_SIZE 32
 621     ScmObj cp;
 622     int size = 0, len = 0, nstrs, ndelim, i, flags = 0;
 623     int dsize, dlen;            /* for delimiter string */
 624     const ScmStringBody *bodies_s[BODY_ARRAY_SIZE], **bodies;
 625     const ScmStringBody *dbody;
 626     char *buf, *bufp;
 627 
 628     nstrs = Scm_Length(strs);
 629     if (nstrs < 0) Scm_Error("improper list not allowed: %S", strs);
 630     if (nstrs == 0) {
 631         if (grammer == SCM_STRING_JOIN_STRICT_INFIX) {
 632             Scm_Error("can't join empty list of strings with strict-infix grammer");
 633         }
 634         return SCM_MAKE_STR("");
 635     }
 636 
 637     if (nstrs >= BODY_ARRAY_SIZE) {
 638         bodies = SCM_NEW_ARRAY(const ScmStringBody *, nstrs);
 639     } else {
 640         bodies = bodies_s;
 641     }
 642 
 643     dbody = SCM_STRING_BODY(delim);
 644     dsize = SCM_STRING_BODY_SIZE(dbody);
 645     dlen  = SCM_STRING_BODY_LENGTH(dbody);
 646     if (SCM_STRING_BODY_INCOMPLETE_P(dbody)) {
 647         flags |= SCM_STRING_INCOMPLETE;
 648     }
 649 
 650     i = 0;
 651     SCM_FOR_EACH(cp, strs) {
 652         const ScmStringBody *b;
 653         if (!SCM_STRINGP(SCM_CAR(cp))) {
 654             Scm_Error("string required, but got %S\n", SCM_CAR(cp));
 655         }
 656         b = SCM_STRING_BODY(SCM_CAR(cp));
 657         size += SCM_STRING_BODY_SIZE(b);
 658         len  += SCM_STRING_BODY_LENGTH(b);
 659         if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
 660             flags |= SCM_STRING_INCOMPLETE;
 661         }
 662         bodies[i++] = b;
 663     }
 664     if (grammer == SCM_STRING_JOIN_INFIX
 665         || grammer == SCM_STRING_JOIN_STRICT_INFIX) {
 666         ndelim = nstrs - 1;
 667     } else {
 668         ndelim = nstrs;
 669     }
 670     size += dsize * ndelim;
 671     len += dlen * ndelim;
 672 
 673     bufp = buf = SCM_NEW_ATOMIC2(char *, size+1);
 674     if (grammer == SCM_STRING_JOIN_PREFIX) {
 675         memcpy(bufp, SCM_STRING_BODY_START(dbody), dsize);
 676         bufp += dsize;
 677     }
 678     for (i=0; i<nstrs; i++) {
 679         const ScmStringBody *b = bodies[i];
 680         memcpy(bufp, SCM_STRING_BODY_START(b), SCM_STRING_BODY_SIZE(b));
 681         bufp += SCM_STRING_BODY_SIZE(b);
 682         if (i < nstrs-1) {
 683             memcpy(bufp, SCM_STRING_BODY_START(dbody), dsize);
 684             bufp += dsize;
 685         }
 686     }
 687     if (grammer == SCM_STRING_JOIN_SUFFIX) {
 688         memcpy(bufp, SCM_STRING_BODY_START(dbody), dsize);
 689         bufp += dsize;
 690     }
 691     *bufp = '\0';
 692     bodies = NULL;              /* to help GC */
 693     return SCM_OBJ(make_str(len, size, buf, flags));
 694 #undef BODY_ARRAY_SIZE
 695 }
 696 
 697 /*----------------------------------------------------------------
 698  * Substitution
 699  */
 700 
 701 static ScmObj string_substitute(ScmString *x,
 702                                 const ScmStringBody *xb, int start,
 703                                 const char *str, int sizey, int leny,
 704                                 int incompletep)
 705 {
 706     int sizex = SCM_STRING_BODY_SIZE(xb), lenx = SCM_STRING_BODY_LENGTH(xb);
 707     int end = start + leny, sizez, newlen;
 708     unsigned int newflags;
 709     char *p;
 710 
 711     if (start < 0) Scm_Error("start index out of range: %d", start);
 712     if (end > lenx) {
 713         Scm_Error("substitution string too long: %S",
 714                   make_str(leny, sizey, str, 0));
 715     }
 716 
 717     if (SCM_STRING_BODY_SINGLE_BYTE_P(xb)) {
 718         /* x is sbstring */
 719         sizez = sizex - leny + sizey;
 720         p = SCM_NEW_ATOMIC2(char *, sizez+1);
 721         if (start > 0) memcpy(p, SCM_STRING_BODY_START(xb), start);
 722         memcpy(p+start, str, sizey);
 723         memcpy(p+start+sizey, SCM_STRING_BODY_START(xb)+end, sizex-end);
 724         p[sizez] = '\0';
 725     } else {
 726         /* x is mbstring */
 727         const char *s, *e;
 728         s = forward_pos(SCM_STRING_BODY_START(xb), start);
 729         e = forward_pos(s, end - start);
 730         sizez = sizex + sizey - (e - s);
 731         p = SCM_NEW_ATOMIC2(char *, sizez+1);
 732         if (start > 0) {
 733             memcpy(p, SCM_STRING_BODY_START(xb), s - SCM_STRING_BODY_START(xb));
 734         }
 735         memcpy(p + (s - SCM_STRING_BODY_START(xb)), str, sizey);
 736         memcpy(p + (s - SCM_STRING_BODY_START(xb)) + sizey, e,
 737                SCM_STRING_BODY_START(xb) + sizex - e);
 738         p[sizez] = '\0';
 739     }
 740     /* Modify x atomically */
 741     newlen = SCM_STRING_BODY_INCOMPLETE_P(xb)? sizez : lenx;
 742     newflags = SCM_STRING_BODY_FLAGS(xb) & ~SCM_STRING_IMMUTABLE;
 743     if (incompletep) newflags |= SCM_STRING_INCOMPLETE;
 744     x->body = make_str_body(newlen,  /* len */
 745                             sizez,   /* size */
 746                             p,       /* start */
 747                             newflags);/* flags */
 748     return SCM_OBJ(x);
 749 }
 750 
 751 ScmObj Scm_StringSubstitute(ScmString *x, int start, ScmString *y)
 752 {
 753     const ScmStringBody *yb = SCM_STRING_BODY(y);
 754     CHECK_MUTABLE(x);
 755     return string_substitute(x, SCM_STRING_BODY(x),
 756                              start,
 757                              SCM_STRING_BODY_START(yb),
 758                              SCM_STRING_BODY_SIZE(yb),
 759                              SCM_STRING_BODY_LENGTH(yb),
 760                              SCM_STRING_BODY_INCOMPLETE_P(yb));
 761 }
 762 
 763 ScmObj Scm_StringSet(ScmString *x, int k, ScmChar ch)
 764 {
 765     const ScmStringBody *xb = SCM_STRING_BODY(x);
 766     CHECK_MUTABLE(x);
 767     if (SCM_STRING_BODY_INCOMPLETE_P(xb)) {
 768         char byte = (char)ch;
 769         return string_substitute(x, xb, k, &byte, 1, 1, TRUE);
 770     } else {
 771         char buf[SCM_CHAR_MAX_BYTES+1];
 772         int size = SCM_CHAR_NBYTES(ch);
 773         SCM_CHAR_PUT(buf, ch);
 774         return string_substitute(x, xb, k, buf, size, 1, FALSE);
 775     }
 776 }
 777 
 778 ScmObj Scm_StringByteSet(ScmString *x, int k, ScmByte b)
 779 {
 780     const ScmStringBody *xb = SCM_STRING_BODY(x);
 781     int size = SCM_STRING_BODY_SIZE(xb);
 782     char *p;
 783     
 784     CHECK_MUTABLE(x);
 785     if (k < 0 || k >= size) Scm_Error("argument out of range: %d", k);
 786     p = SCM_NEW_ATOMIC2(char *, size+1);
 787     memcpy(p, xb->start, size);
 788     p[size] = '\0';
 789     p[k] = (char)b;
 790 
 791     /* Modify x atomically */
 792     x->body = make_str_body(size, size, p, SCM_STRING_INCOMPLETE);
 793     return SCM_OBJ(x);
 794 }
 795 
 796 /*----------------------------------------------------------------
 797  * Substring
 798  */
 799 
 800 static ScmObj substring(const ScmStringBody *xb, int start, int end)
 801 {
 802     if (start < 0)
 803         Scm_Error("start argument needs to be positive: %d", start);
 804     if (end > SCM_STRING_BODY_LENGTH(xb))
 805         Scm_Error("end argument is out of range: %d", end);
 806     if (end < start)
 807         Scm_Error("end argument must be equal to or greater than the start argument: start=%d, end=%d", start, end);
 808     if (SCM_STRING_BODY_SINGLE_BYTE_P(xb)) {
 809         return SCM_OBJ(make_str(end-start,
 810                                 end-start,
 811                                 SCM_STRING_BODY_START(xb) + start,
 812                                 SCM_STRING_BODY_FLAGS(xb)&~SCM_STRING_IMMUTABLE));
 813     } else {
 814         const char *s, *e;
 815         if (start) s = forward_pos(SCM_STRING_BODY_START(xb), start);
 816         else s = SCM_STRING_BODY_START(xb);
 817         e = forward_pos(s, end - start);
 818         return SCM_OBJ(make_str(end - start, e - s, s, 0));
 819     }
 820 }
 821 
 822 ScmObj Scm_Substring(ScmString *x, int start, int end)
 823 {
 824     return substring(SCM_STRING_BODY(x), start, end);
 825 }
 826 
 827 /* Auxiliary procedure to support optional start/end parameter specified
 828    in lots of SRFI-13 functions.   If start and end is specified and restricts
 829    string range, call substring.  Otherwise returns x itself. */
 830 ScmObj Scm_MaybeSubstring(ScmString *x, ScmObj start, ScmObj end)
 831 {
 832     int istart, iend;
 833     const ScmStringBody *xb = SCM_STRING_BODY(x);
 834     if (SCM_UNBOUNDP(start) || SCM_UNDEFINEDP(start)) {
 835         istart = 0;
 836     } else {
 837         if (!SCM_INTP(start))
 838             Scm_Error("exact integer required for start, but got %S", start);
 839         istart = SCM_INT_VALUE(start);
 840     }
 841 
 842     if (SCM_UNBOUNDP(end) || SCM_UNDEFINEDP(end)) {
 843         if (istart == 0) return SCM_OBJ(x);
 844         iend = SCM_STRING_BODY_LENGTH(xb);
 845     } else {
 846         if (!SCM_INTP(end))
 847             Scm_Error("exact integer required for start, but got %S", end);
 848         iend = SCM_INT_VALUE(end);
 849     }
 850     return substring(xb, istart, iend);
 851 }
 852 
 853 /*----------------------------------------------------------------
 854  * Search & parse
 855  */
 856 
 857 /* Split string by char.  Char itself is not included in the result. */
 858 /* TODO: fix semantics.  What should be returned for (string-split "" #\.)? */
 859 ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch)
 860 {
 861     const ScmStringBody *strb = SCM_STRING_BODY(str);
 862     int size = SCM_STRING_BODY_SIZE(strb), sizecnt = 0;
 863     int lencnt = 0;
 864     const char *s = SCM_STRING_BODY_START(strb), *p = s, *e = s + size;
 865     ScmObj head = SCM_NIL, tail = SCM_NIL;
 866 
 867     if (SCM_STRING_BODY_INCOMPLETE_P(strb)) {
 868         /* TODO: fix the policy of handling incomplete string */
 869         Scm_Error("incomplete string not accepted: %S", str);
 870     }
 871     
 872     while (p < e) {
 873         ScmChar cc;
 874         int ncc;
 875 
 876         SCM_CHAR_GET(p, cc);
 877         ncc = SCM_CHAR_NBYTES(cc);
 878         if (ch == cc) {
 879             SCM_APPEND1(head, tail, Scm_MakeString(s, sizecnt, lencnt, 0));
 880             sizecnt = lencnt = 0;
 881             p += ncc;
 882             s = p;
 883         } else {
 884             p += ncc;
 885             sizecnt += ncc;
 886             lencnt ++;
 887         }
 888     }
 889     SCM_APPEND1(head, tail, Scm_MakeString(s, sizecnt, lencnt, 0));
 890     return head;
 891 }
 892 
 893 /* Boyer-Moore string search.  assuming siz1 > siz2, siz2 < 256. */
 894 static inline int boyer_moore(const char *ss1, int siz1,
 895                               const char *ss2, int siz2)
 896 {
 897     unsigned char shift[256];
 898     int i, j, k;
 899     for (i=0; i<256; i++) { shift[i] = siz2; }
 900     for (j=0; j<siz2-1; j++) {
 901         shift[(unsigned char)ss2[j]] = siz2-j-1;
 902     }
 903     for (i=siz2-1; i<siz1; i+=shift[(unsigned char)ss1[i]]) {
 904         for (j=siz2-1, k = i; j>=0 && ss1[k] == ss2[j]; j--, k--)
 905             ;
 906         if (j == -1) return k+1;
 907     }
 908     return -1;
 909 }
 910 
 911 /* Scan s2 in s1.  If both strings are single-byte, and s1 is long,
 912    we use Boyer-Moore.
 913    
 914    To avoid rescanning of the string, this function can return
 915    various information, depends on retmode argument.
 916 
 917    SCM_STRING_SCAN_INDEX  : return the index of s1
 918         s1 = "abcde" and s2 = "cd" => 2
 919    SCM_STRING_SCAN_BEFORE : return substring of s1 before s2
 920         s1 = "abcde" and s2 = "cd" => "ab"
 921    SCM_STRING_SCAN_AFTER  : return substring of s1 after s2
 922         s1 = "abcde" and s2 = "cd" => "e"
 923    SCM_STRING_SCAN_BEFORE2 : return substring of s1 before s2, and rest
 924        s1 = "abcde" and s2 = "cd" => "ab" and "cde"
 925    SCM_STRING_SCAN_AFTER2 : return substring of s1 up to s2 and rest
 926        s1 = "abcde" and s2 = "cd" => "abcd" and "e"
 927    SCM_STRING_SCAN_BOTH   : return substring of s1 before and after s2
 928        s1 = "abcde" and s2 = "cd" => "ab" and "e"
 929 */
 930 static ScmObj string_scan(ScmString *s1, const char *ss2,
 931                           int siz2, int len2, int incomplete2,
 932                           int retmode)
 933 {
 934     int i, incomplete;
 935     const ScmStringBody *sb = SCM_STRING_BODY(s1);
 936     const char *ss1 = SCM_STRING_BODY_START(sb);
 937     int siz1 = SCM_STRING_BODY_SIZE(sb);
 938     int len1 = SCM_STRING_BODY_LENGTH(sb);
 939 
 940     if (retmode < 0 || retmode > SCM_STRING_SCAN_BOTH) {
 941         Scm_Error("return mode out fo range: %d", retmode);
 942     }
 943 
 944     if (siz2 == 0) {
 945         /* shortcut */
 946         switch (retmode) {
 947         case SCM_STRING_SCAN_INDEX: return SCM_MAKE_INT(0);
 948         case SCM_STRING_SCAN_BEFORE: return SCM_MAKE_STR("");
 949         case SCM_STRING_SCAN_AFTER:  return Scm_CopyString(s1);
 950         case SCM_STRING_SCAN_BEFORE2:;
 951         case SCM_STRING_SCAN_AFTER2:;
 952         case SCM_STRING_SCAN_BOTH:
 953             return Scm_Values2(SCM_MAKE_STR(""), Scm_CopyString(s1));
 954         }
 955     }
 956     
 957     if (siz1 == len1) {
 958         if (siz2 == len2) goto sbstring;
 959         goto failed;            /* sbstring can't contain mbstring. */   
 960     }
 961     if (len1 >= len2) {
 962         const char *ssp = ss1;
 963         for (i=0; i<=len1-len2; i++) {
 964             if (memcmp(ssp, ss2, siz2) == 0) {
 965                 switch (retmode) {
 966                 case SCM_STRING_SCAN_INDEX:
 967                     return Scm_MakeInteger(i);
 968                 case SCM_STRING_SCAN_BEFORE:
 969                     return Scm_MakeString(ss1, ssp-ss1, i, 0);
 970                 case SCM_STRING_SCAN_AFTER:
 971                     return Scm_MakeString(ssp+siz2, siz1-(ssp-ss1+siz2),
 972                                           len1-i-len2, 0);
 973                 case SCM_STRING_SCAN_BEFORE2:
 974                     return Scm_Values2(Scm_MakeString(ss1, ssp-ss1, i, 0),
 975                                        Scm_MakeString(ssp, siz1-(ssp-ss1),
 976                                                       len1-i, 0));
 977                 case SCM_STRING_SCAN_AFTER2:
 978                     return Scm_Values2(Scm_MakeString(ss1, ssp-ss1+siz2,
 979                                                       i+len2, 0),
 980                                        Scm_MakeString(ssp+siz2,
 981                                                       siz1-(ssp-ss1+siz2),
 982                                                       len1-i-len2, 0));
 983                 case SCM_STRING_SCAN_BOTH:
 984                     return Scm_Values2(Scm_MakeString(ss1, ssp-ss1, i, 0),
 985                                        Scm_MakeString(ssp+siz2,
 986                                                       siz1-(ssp-ss1+siz2),
 987                                                       len1-i-len2, 0));
 988                 }
 989             }
 990             ssp += SCM_CHAR_NFOLLOWS(*ssp) + 1;
 991         }
 992     }
 993     goto failed;
 994 
 995   sbstring: /* short cut for single-byte strings */
 996     if (siz1 < siz2) goto failed;
 997     if (siz1 < 256 || siz2 >= 256) {
 998         /* brute-force search */
 999         for (i=0; i<=siz1-siz2; i++) {
1000             if (memcmp(ss2, ss1+i, siz2) == 0) break;
1001         }
1002         if (i == siz1-siz2+1) goto failed;
1003     } else {
1004         i = boyer_moore(ss1, siz1, ss2, siz2);
1005         if (i < 0) goto failed;
1006     }
1007     incomplete =
1008         (SCM_STRING_BODY_INCOMPLETE_P(sb) || incomplete2)?
1009         SCM_MAKSTR_INCOMPLETE : 0;
1010     switch (retmode) {
1011     case SCM_STRING_SCAN_INDEX:
1012         return Scm_MakeInteger(i);
1013     case SCM_STRING_SCAN_BEFORE:
1014         return Scm_MakeString(ss1, i, i, incomplete);
1015     case SCM_STRING_SCAN_AFTER:
1016         return Scm_MakeString(ss1+i+siz2, siz1-(i+siz2), siz1-(i+siz2),
1017                               incomplete);
1018     case SCM_STRING_SCAN_BEFORE2:
1019         return Scm_Values2(Scm_MakeString(ss1, i, i, incomplete),
1020                            Scm_MakeString(ss1+i, siz1-i, siz1-i, incomplete));
1021     case SCM_STRING_SCAN_AFTER2:
1022         return Scm_Values2(Scm_MakeString(ss1, i+siz2, i+siz2, incomplete),
1023                            Scm_MakeString(ss1+i+siz2, siz1-(i+siz2),
1024                                           siz1-(i+siz2), incomplete));
1025     case SCM_STRING_SCAN_BOTH:
1026         return Scm_Values2(Scm_MakeString(ss1, i, i, incomplete),
1027                            Scm_MakeString(ss1+i+siz2, siz1-(i+siz2),
1028                                           siz1-(i+siz2), incomplete));
1029     }
1030   failed:
1031     if (retmode <= SCM_STRING_SCAN_AFTER) {
1032         return SCM_FALSE;
1033     } else {
1034         return Scm_Values2(SCM_FALSE, SCM_FALSE);
1035     }
1036 }
1037 
1038 
1039 ScmObj Scm_StringScan(ScmString *s1, ScmString *s2, int retmode)
1040 {
1041     const ScmStringBody *s2b = SCM_STRING_BODY(s2);
1042     return string_scan(s1,
1043                        SCM_STRING_BODY_START(s2b),
1044                        SCM_STRING_BODY_SIZE(s2b),
1045                        SCM_STRING_BODY_LENGTH(s2b),
1046                        SCM_STRING_BODY_INCOMPLETE_P(s2b),
1047                        retmode);
1048 }
1049 
1050 ScmObj Scm_StringScanChar(ScmString *s1, ScmChar ch, int retmode)
1051 {
1052     char buf[SCM_CHAR_MAX_BYTES];
1053     SCM_CHAR_PUT(buf, ch);
1054     return string_scan(s1, buf, SCM_CHAR_NBYTES(ch), 1, FALSE, retmode);
1055 }
1056 
1057 /*----------------------------------------------------------------
1058  * Miscellaneous functions
1059  */
1060 
1061 ScmObj Scm_StringToList(ScmString *str)
1062 {
1063     const ScmStringBody *b = SCM_STRING_BODY(str);
1064     ScmObj start = SCM_NIL, end = SCM_NIL;
1065     const char *bufp = SCM_STRING_BODY_START(b);
1066     int len = SCM_STRING_BODY_LENGTH(b);
1067     ScmChar ch;
1068 
1069     if (SCM_STRING_BODY_INCOMPLETE_P(b))
1070         Scm_Error("incomplete string not supported: %S", str);
1071     while (len-- > 0) {
1072         SCM_CHAR_GET(bufp, ch);
1073         bufp += SCM_CHAR_NBYTES(ch);
1074         SCM_APPEND1(start, end, SCM_MAKE_CHAR(ch));
1075     }
1076     return start;
1077 }
1078 
1079 ScmObj Scm_StringFill(ScmString *str, ScmChar ch,
1080                       ScmObj maybe_start, ScmObj maybe_end)
1081 {
1082     int len, i, start, end, prelen, midlen, postlen;
1083     int chlen = SCM_CHAR_NBYTES(ch);
1084     char *newstr, *p;
1085     const unsigned char *s, *r;
1086     const ScmStringBody *strb = SCM_STRING_BODY(str);
1087 
1088     CHECK_MUTABLE(str);
1089     if (SCM_STRING_BODY_INCOMPLETE_P(strb)) {
1090         Scm_Error("incomplete string not allowed: %S", str);
1091     }
1092     len = SCM_STRING_BODY_LENGTH(strb);
1093 
1094     if (SCM_UNBOUNDP(maybe_start) || SCM_UNDEFINEDP(maybe_start)) {
1095         start = 0;
1096     } else {
1097         if (!SCM_INTP(maybe_start))
1098             Scm_Error("exact integer required for start, but got %S",
1099                       maybe_start);
1100         start = SCM_INT_VALUE(maybe_start);
1101     }
1102     if (SCM_UNBOUNDP(maybe_end) || SCM_UNDEFINEDP(maybe_end)) {
1103         end = len;
1104     } else {
1105         if (!SCM_INTP(maybe_end))
1106             Scm_Error("exact integer required for end, but got %S",
1107                       maybe_end);
1108         end = SCM_INT_VALUE(maybe_end);
1109     }
1110     if (start < 0 || start > end || end > len) {
1111         Scm_Error("start/end pair is out of range: (%d %d)", start, end);
1112     }
1113     if (start == end) return SCM_OBJ(str);
1114     
1115     s = (unsigned char*)SCM_STRING_BODY_START(strb);
1116     for (i = 0; i < start; i++) s += SCM_CHAR_NFOLLOWS(*s)+1;
1117     prelen = s - (unsigned char*)SCM_STRING_BODY_START(strb);
1118     r = s;
1119     for (; i < end; i++)        s += SCM_CHAR_NFOLLOWS(*s)+1;
1120     midlen = s - r;
1121     postlen = SCM_STRING_BODY_SIZE(strb) - midlen - prelen;
1122 
1123     p = newstr = SCM_NEW_ATOMIC2(char *,
1124                                  prelen + (end-start)*chlen + postlen + 1);
1125     memcpy(p, SCM_STRING_BODY_START(strb), prelen);
1126     p += prelen;
1127     for (i=0; i < end-start; i++) {
1128         SCM_CHAR_PUT(p, ch);
1129         p += chlen;
1130     }
1131     memcpy(p, SCM_STRING_BODY_START(strb) + prelen + midlen, postlen);
1132     p[postlen] = '\0';          /* be friendly to C */
1133     /* modify str atomically */
1134     str->body = make_str_body(SCM_STRING_BODY_LENGTH(strb),
1135                               prelen + (end-start)*chlen + postlen,
1136                               newstr,
1137                               0);
1138     return SCM_OBJ(str);
1139 }
1140 
1141 ScmObj Scm_ConstCStringArrayToList(const char **array, int size)
1142 {
1143     int i;
1144     ScmObj h = SCM_NIL, t = SCM_NIL;
1145     if (size < 0) {
1146         for (;*array; array++) SCM_APPEND1(h, t, SCM_MAKE_STR(*array));
1147     } else {
1148         for (i=0; i<size; i++) SCM_APPEND1(h, t, SCM_MAKE_STR(*array++));
1149     }
1150     return h;
1151 }
1152 
1153 ScmObj Scm_CStringArrayToList(char **array, int size)
1154 {
1155     int i;
1156     ScmObj h = SCM_NIL, t = SCM_NIL;
1157     if (size < 0) {
1158         for (;*array; array++)
1159             SCM_APPEND1(h, t, SCM_MAKE_STR_COPYING(*array));
1160     } else {
1161         for (i=0; i<size; i++)
1162             SCM_APPEND1(h, t, SCM_MAKE_STR_COPYING(*array++));
1163     }
1164     return h;
1165 }
1166 
1167 /*----------------------------------------------------------------
1168  * printer
1169  */
1170 static inline void string_putc(ScmChar ch, ScmPort *port, int bytemode)
1171 {
1172     char buf[5];
1173     switch (ch) {
1174     case '\\': SCM_PUTZ("\\\\", -1, port); break;
1175     case '"':  SCM_PUTZ("\\\"", -1, port); break;
1176     case '\n': SCM_PUTZ("\\n", -1, port); break;
1177     case '\t': SCM_PUTZ("\\t", -1, port); break;
1178     case '\r': SCM_PUTZ("\\r", -1, port); break;
1179     case '\f': SCM_PUTZ("\\f", -1, port); break;
1180     case '\0': SCM_PUTZ("\\0", -1, port); break;
1181     default:
1182         if (ch < ' ' || ch == 0x7f || (bytemode && ch >= 0x80)) {
1183             snprintf(buf, 5, "\\x%02x", (unsigned char)ch);
1184             SCM_PUTZ(buf, -1, port);
1185         } else {
1186             SCM_PUTC(ch, port);
1187         }
1188     }
1189 }
1190 
1191 static void string_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx)
1192 {
1193     ScmString *str = SCM_STRING(obj);
1194     if (SCM_WRITE_MODE(ctx) == SCM_WRITE_DISPLAY) {
1195         SCM_PUTS(str, port);
1196     } else {
1197         const ScmStringBody *b = SCM_STRING_BODY(str);
1198         if (SCM_STRING_BODY_SINGLE_BYTE_P(b)) {
1199             const char *cp = SCM_STRING_BODY_START(b);
1200             int size = SCM_STRING_BODY_SIZE(b);
1201             if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
1202                 SCM_PUTZ("#*\"", -1, port);
1203             } else {
1204                 SCM_PUTC('"', port);
1205             }
1206             while (size--) {
1207                 string_putc(*cp++, port, SCM_STRING_BODY_INCOMPLETE_P(b));
1208             }
1209         } else {
1210             ScmChar ch;
1211             const char *cp = SCM_STRING_BODY_START(b);
1212             int len = SCM_STRING_BODY_LENGTH(b);
1213 
1214             SCM_PUTC('"', port);
1215             while (len--) {
1216                 SCM_CHAR_GET(cp, ch);
1217                 string_putc(ch, port, FALSE);
1218                 cp += SCM_CHAR_NBYTES(ch);
1219             }
1220         }
1221         SCM_PUTC('"', port);
1222     }
1223 }
1224 
1225 /*==================================================================
1226  *
1227  * String pointer
1228  *
1229  */
1230 
1231 SCM_DEFINE_BUILTIN_CLASS_SIMPLE(Scm_StringPointerClass, NULL);
1232 
1233 ScmObj Scm_MakeStringPointer(ScmString *src, int index, int start, int end)
1234 {
1235     const ScmStringBody *srcb = SCM_STRING_BODY(src);
1236     int len = SCM_STRING_BODY_LENGTH(srcb);
1237     int effective_size;
1238     const char *sptr, *ptr, *eptr;
1239     ScmStringPointer *sp;
1240 
1241     SCM_CHECK_START_END(start, end, len);
1242     while (index < 0) index += (end - start) + 1;
1243     if (index > (end - start)) goto badindex;
1244     
1245     if (SCM_STRING_BODY_SINGLE_BYTE_P(srcb)) {
1246         sptr = SCM_STRING_BODY_START(srcb) + start;
1247         ptr = sptr + index;
1248         effective_size = end - start;
1249     } else {
1250         sptr = forward_pos(SCM_STRING_BODY_START(srcb), start);
1251         ptr = forward_pos(sptr, index);
1252         eptr = forward_pos(sptr, end - start);
1253         effective_size = eptr - ptr;
1254     }
1255     sp = SCM_NEW(ScmStringPointer);
1256     SCM_SET_CLASS(sp, SCM_CLASS_STRING_POINTER);
1257     sp->length = (SCM_STRING_BODY_INCOMPLETE_P(srcb)? -1 : (end-start));
1258     sp->size = effective_size;
1259     sp->start = sptr;
1260     sp->index = index;
1261     sp->current = ptr;
1262     return SCM_OBJ(sp);
1263   badindex:
1264     Scm_Error("index out of range: %d", index);
1265     return SCM_UNDEFINED;
1266 }
1267 
1268 ScmObj Scm_StringPointerRef(ScmStringPointer *sp)
1269 {
1270     ScmChar ch;
1271     if (sp->length < 0 || sp->size == sp->length) {
1272         if (sp->index >= sp->size) return SCM_EOF;
1273         ch = *sp->current;
1274     } else {
1275         if (sp->index >= sp->length) return SCM_EOF;
1276         SCM_CHAR_GET(sp->current, ch);
1277     }
1278     return SCM_MAKE_CHAR(ch);
1279 }
1280 
1281 ScmObj Scm_StringPointerNext(ScmStringPointer *sp)
1282 {
1283     ScmChar ch;
1284     if (sp->length < 0 || sp->size == sp->length) {
1285         if (sp->index >= sp->size) return SCM_EOF;
1286         sp->index++;
1287         ch = *sp->current++;
1288     } else {
1289         if (sp->index >= sp->length) return SCM_EOF;
1290         SCM_CHAR_GET(sp->current, ch);
1291         sp->index++;
1292         sp->current += SCM_CHAR_NFOLLOWS(*sp->current) + 1;
1293     }
1294     return SCM_MAKE_CHAR(ch);
1295 }
1296 
1297 ScmObj Scm_StringPointerPrev(ScmStringPointer *sp)
1298 {
1299     ScmChar ch;
1300     if (sp->index <= 0) return SCM_EOF;
1301     if (sp->length < 0 || sp->size == sp->length) {
1302         sp->index--;
1303         ch = *--sp->current;
1304     } else {
1305         const char *prev;
1306         SCM_CHAR_BACKWARD(sp->current, sp->start, prev);
1307         SCM_ASSERT(prev != NULL);
1308         SCM_CHAR_GET(prev, ch);
1309         sp->index--;
1310         sp->current = prev;
1311     }
1312     return SCM_MAKE_CHAR(ch);
1313 }
1314 
1315 ScmObj Scm_StringPointerSet(ScmStringPointer *sp, int index)
1316 {
1317     if (index < 0) goto badindex;
1318     if (sp->length < 0 || sp->size == sp->length) {
1319         if (index > sp->size) goto badindex;
1320         sp->index = index;
1321         sp->current = sp->start + index;
1322     } else {
1323         if (index > sp->length) goto badindex;
1324         sp->index = index;
1325         sp->current = forward_pos(sp->start, index);
1326     }
1327     return SCM_OBJ(sp);
1328   badindex:
1329     Scm_Error("index out of range: %d", index);
1330     return SCM_UNDEFINED;
1331 }
1332 
1333 ScmObj Scm_StringPointerSubstring(ScmStringPointer *sp, int afterp)
1334 {
1335     if (sp->length < 0) {
1336         if (afterp)
1337             return SCM_OBJ(make_str(-1, sp->size - sp->index, sp->current, 0));
1338         else
1339             return SCM_OBJ(make_str(-1, sp->index, sp->start, 0));
1340     } else {
1341         if (afterp)
1342             return SCM_OBJ(make_str(sp->length - sp->index,
1343                                     sp->start + sp->size - sp->current,
1344                                     sp->current, 0));
1345         else
1346             return SCM_OBJ(make_str(sp->index,
1347                                     sp->current - sp->start,
1348                                     sp->start, 0));
1349     }
1350 }
1351 
1352 /* Copy string pointer.
1353    Thanks to Alex Shinn (foof@synthcode.com) */
1354 ScmObj Scm_StringPointerCopy(ScmStringPointer *sp1)
1355 {
1356     ScmStringPointer *sp2 = SCM_NEW(ScmStringPointer);
1357     SCM_SET_CLASS(sp2, SCM_CLASS_STRING_POINTER);
1358     sp2->length  = sp1->length;
1359     sp2->size    = sp1->size;
1360     sp2->start   = sp1->start;
1361     sp2->index   = sp1->index;
1362     sp2->current = sp1->current;
1363     return SCM_OBJ(sp2);
1364 }
1365 
1366 /* Dump string pointer info for debugging.
1367    Thanks to Alex Shinn (foof@synthcode.com) */
1368 #if SCM_DEBUG_HELPER
1369 void Scm_StringPointerDump(ScmStringPointer *sp1)
1370 {
1371     Scm_Printf(SCM_CUROUT,
1372                "<sp addr: %p len: %d size: %d start: %p index: %d cur: %d>\n",
1373                sp1, sp1->length, sp1->size, sp1->start, sp1->index,
1374                sp1->current);
1375 }
1376 #endif /*SCM_DEBUG_HELPER*/
1377 
1378 /*==================================================================
1379  *
1380  * Dynamic strings
1381  *
1382  */
1383 
1384 /* I used to use realloc() to grow the storage; now I avoid it, for
1385    Boehm GC's realloc almost always copies the original content and
1386    we don't get any benefit.
1387    The growing string is kept in the chained chunks.  The size of
1388    chunk getting bigger as the string grows, until a certain threshold.
1389    The memory for actual chunks and the chain is allocated separately,
1390    in order to use SCM_NEW_ATOMIC.
1391  */
1392 
1393 /* NB: it is important that DString functions don't call any
1394  * time-consuming procedures except memory allocation.   Some of
1395  * mutex code in other parts relies on that fact.
1396  */
1397 
1398 /* maximum chunk size */
1399 #define DSTRING_MAX_CHUNK_SIZE  8180
1400 
1401 void Scm_DStringInit(ScmDString *dstr)
1402 {
1403     dstr->init.bytes = 0;
1404     dstr->anchor = dstr->tail = NULL;
1405     dstr->current = dstr->init.data;
1406     dstr->end = dstr->current + SCM_DSTRING_INIT_CHUNK_SIZE;
1407     dstr->lastChunkSize = SCM_DSTRING_INIT_CHUNK_SIZE;
1408     dstr->length = 0;
1409 }
1410 
1411 inline int Scm_DStringSize(ScmDString *dstr)
1412 {
1413     ScmDStringChain *chain;
1414     int size;
1415     if (dstr->tail) {
1416         size = dstr->init.bytes;
1417         dstr->tail->chunk->bytes = (int)(dstr->current - dstr->tail->chunk->data);
1418         for (chain = dstr->anchor; chain; chain = chain->next) {
1419             size += chain->chunk->bytes;
1420         }
1421     } else {
1422         size = (int)(dstr->current - dstr->init.data);
1423     }
1424     return size;
1425 }
1426 
1427 void Scm__DStringRealloc(ScmDString *dstr, int minincr)
1428 {
1429     ScmDStringChunk *newchunk;
1430     ScmDStringChain *newchain;
1431     int newsize;
1432 
1433     /* sets the byte count of the last chunk */
1434     if (dstr->tail) {
1435         dstr->tail->chunk->bytes = (int)(dstr->current - dstr->tail->chunk->data);
1436     } else {
1437         dstr->init.bytes = (int)(dstr->current - dstr->init.data);
1438     }
1439 
1440     /* determine the size of the new chunk.  the increase factor 3 is
1441        somewhat arbitrary, determined by rudimental benchmarking. */
1442     newsize = dstr->lastChunkSize * 3;
1443     if (newsize > DSTRING_MAX_CHUNK_SIZE) {
1444         newsize = DSTRING_MAX_CHUNK_SIZE;
1445     }
1446     if (newsize < minincr) {
1447         newsize = minincr;
1448     }
1449 
1450     newchunk = SCM_NEW_ATOMIC2(ScmDStringChunk*,
1451                                sizeof(ScmDStringChunk)+newsize-SCM_DSTRING_INIT_CHUNK_SIZE);
1452     newchunk->bytes = 0;
1453     
1454     newchain = SCM_NEW(ScmDStringChain);
1455     
1456     newchain->next = NULL;
1457     newchain->chunk = newchunk;
1458     if (dstr->tail) {
1459         dstr->tail->next = newchain;
1460         dstr->tail = newchain;
1461     } else {
1462         dstr->anchor = dstr->tail = newchain;
1463     }
1464     dstr->current = newchunk->data;
1465     dstr->end = newchunk->data + newsize;
1466     dstr->lastChunkSize = newsize;
1467 }
1468 
1469 /* Retrieve accumulated string. */
1470 static const char *dstring_getz(ScmDString *dstr, int *plen, int *psiz)
1471 {
1472     int size, len;
1473     char *buf;
1474     if (dstr->anchor == NULL) {
1475         /* we only have one chunk */
1476         size = (int)(dstr->current - dstr->init.data);
1477         len = dstr->length;
1478         buf = SCM_NEW_ATOMIC2(char*, size+1);
1479         memcpy(buf, dstr->init.data, size);
1480         buf[size] = '\0';
1481     } else {
1482         ScmDStringChain *chain = dstr->anchor;
1483         char *bptr;
1484         
1485         size = Scm_DStringSize(dstr);
1486         len = dstr->length;
1487         bptr = buf = SCM_NEW_ATOMIC2(char*, size+1);
1488 
1489         memcpy(bptr, dstr->init.data, dstr->init.bytes);
1490         bptr += dstr->init.bytes;
1491         for (; chain; chain = chain->next) {
1492             memcpy(bptr, chain->chunk->data, chain->chunk->bytes);
1493             bptr += chain->chunk->bytes;
1494         }
1495         *bptr = '\0';
1496     }
1497     if (len < 0) len = count_length(buf, size);
1498     *plen = len;
1499     *psiz = size;
1500     return buf;
1501 }
1502 
1503 ScmObj Scm_DStringGet(ScmDString *dstr, int flags)
1504 {
1505     int len, size;
1506     const char *str = dstring_getz(dstr, &len, &size);
1507     return SCM_OBJ(make_str(len, size, str, flags));
1508 }
1509 
1510 /* For conveninence.   Note that dstr may already contain NUL byte in it,
1511    in that case you'll get chopped string. */
1512 const char *Scm_DStringGetz(ScmDString *dstr)
1513 {
1514     int len, size;
1515     return dstring_getz(dstr, &len, &size);
1516 }
1517 
1518 void Scm_DStringPutz(ScmDString *dstr, const char *str, int size)
1519 {
1520     if (size < 0) size = strlen(str);
1521     if (dstr->current + size > dstr->end) {
1522         Scm__DStringRealloc(dstr, size);
1523     }
1524     memcpy(dstr->current, str, size);
1525     dstr->current += size;
1526     if (dstr->length >= 0) {
1527         int len = count_length(str, size);
1528         if (len >= 0) dstr->length += len;
1529         else dstr->length = -1;
1530     }
1531 }
1532 
1533 void Scm_DStringAdd(ScmDString *dstr, ScmString *str)
1534 {
1535     const ScmStringBody *b = SCM_STRING_BODY(str);
1536     int size = SCM_STRING_BODY_SIZE(b);
1537     if (size == 0) return;
1538     if (dstr->current + size > dstr->end) {
1539         Scm__DStringRealloc(dstr, size);
1540     }
1541     memcpy(dstr->current, SCM_STRING_BODY_START(b), size);
1542     dstr->current += size;
1543     if (dstr->length >= 0 && !SCM_STRING_BODY_INCOMPLETE_P(b)) {
1544         dstr->length += SCM_STRING_BODY_LENGTH(b);
1545     } else {
1546         dstr->length = -1;
1547     }
1548 }
1549 
1550 void Scm_DStringPutb(ScmDString *ds, char byte)
1551 {
1552     SCM_DSTRING_PUTB(ds, byte);
1553 }
1554 
1555 void Scm_DStringPutc(ScmDString *ds, ScmChar ch)
1556 {
1557     SCM_DSTRING_PUTC(ds, ch);
1558 }
1559 
1560 
1561 /* for debug */
1562 #if SCM_DEBUG_HELPER
1563 void Scm_DStringDump(FILE *out, ScmDString *dstr)
1564 {
1565     fprintf(out, "DString %p\n", dstr);
1566     if (dstr->anchor) {
1567         ScmDStringChain *chain; int i;
1568         fprintf(out, "  chunk0[%3d] = \"", dstr->init.bytes);
1569         fwrite(dstr->init.data, 1, dstr->init.bytes, out);
1570         fprintf(out, "\"\n");
1571         for (i=1, chain = dstr->anchor; chain; chain = chain->next, i++) {
1572             int size = (chain->next? chain->chunk->bytes : (int)(dstr->current - dstr->tail->chunk->data));
1573             fprintf(out, "  chunk%d[%3d] = \"", i, size);
1574             fwrite(chain->chunk->data, 1, size, out);
1575             fprintf(out, "\"\n");
1576         }
1577     } else {
1578         int size = (int)(dstr->current - dstr->init.data);
1579         fprintf(out, "  chunk0[%3d] = \"", size);
1580         fwrite(dstr->init.data, 1, size, out);
1581         fprintf(out, "\"\n");
1582     }
1583 }
1584 #endif /*SCM_DEBUG_HELPER*/
1585 

/* [<][>][^][v][top][bottom][index][help] */