/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- Scm_ReadWithContext
- Scm_Read
- Scm_ReadFromString
- Scm_ReadFromCString
- Scm_ReadListWithContext
- Scm_ReadList
- read_context_init
- Scm_ReadError
- Scm_MakeReadReference
- read_reference_print
- ref_push
- ref_val
- ref_register
- read_context_flush
- char_word_constituent
- char_word_case_fold
- read_nested_comment
- skipws
- read_internal
- read_item
- read_list_int
- read_list
- read_vector
- read_quoted
- read_string_xdigits
- read_string
- read_char
- read_word
- read_symbol
- read_number
- read_symbol_or_number
- read_keyword
- read_escaped_symbol
- read_regexp
- read_charset
- read_reference
- Scm_DefineReaderCtor
- read_sharp_comma
- process_sharp_comma
- reader_ctor
- maybe_uvector
- Scm__InitRead
1 /*
2 * read.c - reader
3 *
4 * Copyright (c) 2000-2005 Shiro Kawai, All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the authors nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * $Id: read.c,v 1.83 2005/10/13 08:14:13 shirok Exp $
34 */
35
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <math.h>
39 #define LIBGAUCHE_BODY
40 #include "gauche.h"
41 #include "gauche/vm.h"
42 #include "gauche/port.h"
43 #include "gauche/builtin-syms.h"
44
45 /*
46 * READ
47 */
48
49 static void read_context_init(ScmVM *vm, ScmReadContext *ctx);
50 static void read_context_flush(ScmReadContext *ctx);
51 static ScmObj read_internal(ScmPort *port, ScmReadContext *ctx);
52 static ScmObj read_item(ScmPort *port, ScmReadContext *ctx);
53 static ScmObj read_list(ScmPort *port, ScmChar closer, ScmReadContext *ctx);
54 static ScmObj read_vector(ScmPort *port, ScmChar closer, ScmReadContext *ctx);
55 static ScmObj read_string(ScmPort *port, int incompletep, ScmReadContext *ctx);
56 static ScmObj read_quoted(ScmPort *port, ScmObj quoter, ScmReadContext *ctx);
57 static ScmObj read_char(ScmPort *port, ScmReadContext *ctx);
58 static ScmObj read_word(ScmPort *port, ScmChar initial, ScmReadContext *ctx,
59 int temp_case_fold);
60 static ScmObj read_symbol(ScmPort *port, ScmChar initial, ScmReadContext *ctx);
61 static ScmObj read_number(ScmPort *port, ScmChar initial, ScmReadContext *ctx);
62 static ScmObj read_symbol_or_number(ScmPort *port, ScmChar initial, ScmReadContext *ctx);
63 static ScmObj read_escaped_symbol(ScmPort *port, ScmChar delim);
64 static ScmObj read_keyword(ScmPort *port, ScmReadContext *ctx);
65 static ScmObj read_regexp(ScmPort *port);
66 static ScmObj read_charset(ScmPort *port);
67 static ScmObj read_sharp_comma(ScmPort *port, ScmReadContext *ctx);
68 static ScmObj process_sharp_comma(ScmPort *port, ScmObj key, ScmObj args,
69 ScmReadContext *ctx, int has_ref);
70 static ScmObj read_reference(ScmPort *port, ScmChar ch, ScmReadContext *ctx);
71 static ScmObj maybe_uvector(ScmPort *port, char c, ScmReadContext *ctx);
72
73 /* Special hook for SRFI-4 syntax */
74 ScmObj (*Scm_ReadUvectorHook)(ScmPort *port, const char *tag,
75 ScmReadContext *ctx) = NULL;
76
77 /* Table of 'read-time constructor' in SRFI-10 */
78 static struct {
79 ScmHashTable *table;
80 ScmInternalMutex mutex;
81 } readCtorData = { NULL };
82
83 /*----------------------------------------------------------------
84 * Entry points
85 * Note: Entire read operation are done while locking the input port.
86 * So we can use 'unsafe' version of port operations inside this file.
87 * The lock is removed if reader routine signals an error. It is OK
88 * to call read routine recursively.
89 */
90 ScmObj Scm_ReadWithContext(ScmObj port, ScmReadContext *ctx)
91 {
92 ScmVM *vm = Scm_VM();
93 volatile ScmObj r = SCM_NIL;
94 if (!SCM_PORTP(port) || SCM_PORT_DIR(port) != SCM_PORT_INPUT) {
95 Scm_Error("input port required: %S", port);
96 }
97 if (!(ctx->flags & SCM_READ_RECURSIVELY)) {
98 ctx->table = NULL;
99 ctx->pending = SCM_NIL;
100 }
101 if (PORT_LOCKED(SCM_PORT(port), vm)) {
102 r = read_item(SCM_PORT(port), ctx);
103 } else {
104 PORT_LOCK(SCM_PORT(port), vm);
105 PORT_SAFE_CALL(SCM_PORT(port), r = read_item(SCM_PORT(port), ctx));
106 PORT_UNLOCK(SCM_PORT(port));
107 }
108 if (!(ctx->flags & SCM_READ_RECURSIVELY)) {
109 read_context_flush(ctx);
110 }
111 return r;
112 }
113
114 ScmObj Scm_Read(ScmObj port)
115 {
116 ScmReadContext ctx;
117 read_context_init(Scm_VM(), &ctx);
118 return Scm_ReadWithContext(port, &ctx);
119 }
120
121 /* Convenience functions */
122 ScmObj Scm_ReadFromString(ScmString *str)
123 {
124 ScmObj inp = Scm_MakeInputStringPort(str, TRUE), r;
125 ScmReadContext ctx;
126 read_context_init(Scm_VM(), &ctx);
127 r = read_item(SCM_PORT(inp), &ctx);
128 read_context_flush(&ctx);
129 return r;
130 }
131
132 ScmObj Scm_ReadFromCString(const char *cstr)
133 {
134 ScmObj s = SCM_MAKE_STR_IMMUTABLE(cstr);
135 ScmObj inp = Scm_MakeInputStringPort(SCM_STRING(s), TRUE);
136 ScmObj r;
137 ScmReadContext ctx;
138 read_context_init(Scm_VM(), &ctx);
139 r = read_item(SCM_PORT(inp), &ctx);
140 read_context_flush(&ctx);
141 return r;
142 }
143
144 ScmObj Scm_ReadListWithContext(ScmObj port, ScmChar closer, ScmReadContext *ctx)
145 {
146 ScmVM *vm = Scm_VM();
147 volatile ScmObj r = SCM_NIL;
148 if (!SCM_PORTP(port) || SCM_PORT_DIR(port) != SCM_PORT_INPUT) {
149 Scm_Error("input port required: %S", port);
150 }
151 if (!(ctx->flags & SCM_READ_RECURSIVELY)) {
152 ctx->table = NULL;
153 ctx->pending = SCM_NIL;
154 }
155 if (PORT_LOCKED(SCM_PORT(port), vm)) {
156 r = read_list(SCM_PORT(port), closer, ctx);
157 } else {
158 PORT_LOCK(SCM_PORT(port), vm);
159 PORT_SAFE_CALL(SCM_PORT(port), r = read_list(SCM_PORT(port), closer, ctx));
160 PORT_UNLOCK(SCM_PORT(port));
161 }
162 if (!(ctx->flags & SCM_READ_RECURSIVELY)) {
163 read_context_flush(ctx);
164 }
165 return r;
166 }
167
168 ScmObj Scm_ReadList(ScmObj port, ScmChar closer)
169 {
170 ScmReadContext ctx;
171 read_context_init(Scm_VM(), &ctx);
172 return Scm_ReadListWithContext(port, closer, &ctx);
173 }
174
175 static void read_context_init(ScmVM *vm, ScmReadContext *ctx)
176 {
177 ctx->flags = SCM_READ_SOURCE_INFO;
178 if (SCM_VM_RUNTIME_FLAG_IS_SET(vm, SCM_CASE_FOLD)) {
179 ctx->flags |= SCM_READ_CASE_FOLD;
180 }
181 ctx->table = NULL;
182 ctx->pending = SCM_NIL;
183 }
184
185 /*----------------------------------------------------------------
186 * Error
187 */
188
189 void Scm_ReadError(ScmPort *port, const char *msg, ...)
190 {
191 ScmObj ostr = Scm_MakeOutputStringPort(TRUE);
192 ScmObj name = Scm_PortName(port);
193 ScmObj rerr;
194 int line = Scm_PortLine(port);
195 va_list ap;
196
197 Scm_Printf(SCM_PORT(ostr), "Read error at %S:",
198 SCM_STRINGP(name)? name : SCM_OBJ(SCM_MAKE_STR("??")));
199 if (line >= 0) {
200 Scm_Printf(SCM_PORT(ostr), "line %d: ", line);
201 }
202 va_start(ap, msg);
203 Scm_Vprintf(SCM_PORT(ostr), msg, ap, TRUE);
204 va_end(ap);
205
206 rerr = Scm_MakeReadError(Scm_GetOutputString(SCM_PORT(ostr)), port, line);
207 Scm_Raise(rerr);
208 }
209
210 /*----------------------------------------------------------------
211 * Read reference
212 */
213
214 /* Read reference is a proxy object to for referenced object (#N=).
215 */
216
217 static void read_reference_print(ScmObj obj, ScmPort *port,
218 ScmWriteContext *ctx);
219 SCM_DEFINE_BUILTIN_CLASS_SIMPLE(Scm_ReadReferenceClass, read_reference_print);
220
221 ScmObj Scm_MakeReadReference(void)
222 {
223 ScmReadReference *a;
224 a = SCM_NEW(ScmReadReference);
225 SCM_SET_CLASS(a, SCM_CLASS_READ_REFERENCE);
226 a->value = SCM_UNBOUND;
227 return SCM_OBJ(a);
228 }
229
230 static void read_reference_print(ScmObj obj, ScmPort *port,
231 ScmWriteContext *ctx)
232 {
233 Scm_Printf(port, "#<read-reference>");
234 }
235
236 static void ref_push(ScmReadContext *ctx, ScmObj obj, ScmObj finisher)
237 {
238 ctx->pending = Scm_Acons(obj, finisher, ctx->pending);
239 }
240
241 static ScmObj ref_val(ref)
242 {
243 if (!SCM_READ_REFERENCE_REALIZED(ref)) {
244 Scm_Error("reader encontered unresolved read reference. Implementation error?");
245 }
246 return SCM_READ_REFERENCE(ref)->value;
247 }
248
249 static ScmObj ref_register(ScmReadContext *ctx, ScmObj obj, int refnum)
250 {
251 SCM_ASSERT(ctx->table);
252 Scm_HashTablePut(ctx->table, SCM_MAKE_INT(refnum), obj);
253 return obj;
254 }
255
256 /* ctx->pending contains an assoc list of objects who contains read reference
257 which should be resolved.
258 The car of each entry is the object that needs to be fixed, and the
259 cdr of eacy entry may contain a finisher procedure (if the object is
260 created by read-time constructor.
261 */
262 static void read_context_flush(ScmReadContext *ctx)
263 {
264 ScmObj cp, ep, entry, obj, finisher;
265
266 SCM_FOR_EACH(cp, ctx->pending) {
267 entry = SCM_CAR(cp);
268 SCM_ASSERT(SCM_PAIRP(entry));
269 obj = SCM_CAR(entry);
270 finisher = SCM_CDR(entry);
271
272 if (!SCM_FALSEP(finisher)) {
273 Scm_Apply(finisher, SCM_LIST1(obj));
274 } else if (SCM_PAIRP(obj)) {
275 SCM_FOR_EACH(ep, obj) {
276 if (SCM_READ_REFERENCE_P(SCM_CAR(ep))) {
277 SCM_SET_CAR(ep, ref_val(SCM_CAR(ep)));
278 }
279 if (SCM_READ_REFERENCE_P(SCM_CDR(ep))) {
280 /* in case we have (... . #N#) */
281 SCM_SET_CDR(ep, ref_val(SCM_CDR(ep)));
282 break;
283 }
284 }
285 } else if (SCM_VECTORP(obj)) {
286 int i, len = SCM_VECTOR_SIZE(obj);
287 for (i=0; i<len; i++) {
288 ep = SCM_VECTOR_ELEMENT(obj, i);
289 if (SCM_READ_REFERENCE_P(ep)) {
290 SCM_VECTOR_ELEMENTS(obj)[i] = ref_val(ep);
291 }
292 }
293 } else {
294 Scm_Error("read_context_flush: recursive reference only supported with vector and lists");
295 }
296 }
297 }
298
299 /*----------------------------------------------------------------
300 * Miscellaneous routines
301 */
302
303 /* Table of initial 128 bytes of ASCII characters to dispatch for
304 special meanings.
305 bit 0 : a valid constituent char of words
306 bit 1 : candidate of case folding
307
308 NB: '#' is marked as a constituent char, in order to read a possible
309 number as a word in read_word. The leading '#' is recognized by
310 read_internal and will not be passed to read_word.
311 */
312 static unsigned char ctypes[] = {
313 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315 /* ! " # $ % & ' ( ) * + , - . / */
316 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,
317 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
319 /* @ A B C D E F G H I J K L M N O */
320 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
321 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
322 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 1, 1,
323 /* ` a b c d e f g h i j k l m n o */
324 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325 /* p q r s t u v w x y z { | } ~ ^? */
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
327 };
328
329 inline static int char_word_constituent(int c)
330 {
331 return (c >= 128 || (c >= 0 && (ctypes[(unsigned char)c]&1)));
332 }
333
334 inline static int char_word_case_fold(int c)
335 {
336 return (c >= 0 && c < 128 && (ctypes[(unsigned char)c]&2));
337 }
338
339 static void read_nested_comment(ScmPort *port, ScmReadContext *ctx)
340 {
341 int nesting = 0;
342 int line = Scm_PortLine(port);
343 ScmChar c, c1;
344
345 for (c = Scm_GetcUnsafe(port);;) {
346 switch (c) {
347 case '#':
348 c1 = Scm_GetcUnsafe(port);
349 if (c1 == '|') { nesting++; break; }
350 else if (c1 == EOF) goto eof;
351 else c = c1;
352 continue;
353 case '|':
354 c1 = Scm_GetcUnsafe(port);
355 if (c1 == '#') {
356 if (nesting-- == 0) {
357 return;
358 }
359 break;
360 }
361 else if (c1 == EOF) goto eof;
362 else c = c1;
363 continue;
364 case EOF:
365 eof:
366 Scm_ReadError(port, "encountered EOF inside nested multi-line comment (comment begins at line %d)", line);
367 default:
368 break;
369 }
370 c = Scm_GetcUnsafe(port);
371 }
372 }
373
374 static int skipws(ScmPort *port, ScmReadContext *ctx)
375 {
376 for (;;) {
377 int c = Scm_GetcUnsafe(port);
378 if (c == EOF) return c;
379 if (c <= 256 && isspace(c)) continue;
380 if (c == ';') {
381 for (;;) {
382 /* NB: comment may contain unexpected character code.
383 for the safety, we read bytes here. */
384 c = Scm_GetbUnsafe(port);
385 if (c == '\n') {
386 /* oops. ugly. */
387 port->line++;
388 break;
389 }
390 if (c == EOF) return EOF;
391 }
392 continue;
393 }
394 return c;
395 }
396 }
397
398 static ScmObj read_internal(ScmPort *port, ScmReadContext *ctx)
399 {
400 int c = skipws(port, ctx);
401 switch (c) {
402 case '(':
403 return read_list(port, ')', ctx);
404 case '"':
405 return read_string(port, FALSE, ctx);
406 case '#':
407 {
408 int c1 = Scm_GetcUnsafe(port);
409 switch (c1) {
410 case EOF:
411 Scm_ReadError(port, "premature #-sequence at EOF");
412 case 't':; case 'T': return SCM_TRUE;
413 case 'f':; case 'F': return maybe_uvector(port, 'f', ctx);
414 case 's':; case 'S': return maybe_uvector(port, 's', ctx);
415 case 'u':; case 'U': return maybe_uvector(port, 'u', ctx);
416 case '(':
417 return read_vector(port, ')', ctx);
418 case '\\':
419 return read_char(port, ctx);
420 case 'x':; case 'X':; case 'o':; case 'O':;
421 case 'b':; case 'B':; case 'd':; case 'D':;
422 case 'e':; case 'E':; case 'i':; case 'I':;
423 Scm_UngetcUnsafe(c1, port);
424 return read_number(port, c, ctx);
425 case '!':
426 /* allow `#!' magic of executable */
427 for (;;) {
428 c = Scm_GetcUnsafe(port);
429 if (c == '\n') return SCM_UNDEFINED;
430 if (c == EOF) return SCM_EOF;
431 }
432 case '/':
433 /* #/.../ literal regexp */
434 return read_regexp(port);
435 case '[':
436 /* #[...] literal charset */
437 return read_charset(port);
438 case ',':
439 /* #,(form) - SRFI-10 read-time macro */
440 return read_sharp_comma(port, ctx);
441 case '|':
442 /* #| - block comment (SRFI-30)
443 it is equivalent to whitespace, so we return #<undef> */
444 read_nested_comment(port, ctx);
445 return SCM_UNDEFINED;
446 case '`':
447 /* #`"..." is a special syntax of #,(string-interpolate "...") */
448 {
449 ScmObj form = read_item(port, ctx);
450 return process_sharp_comma(port,
451 SCM_SYM_STRING_INTERPOLATE,
452 SCM_LIST1(form), ctx, FALSE);
453 }
454 case '?':
455 /* #? - debug directives */
456 {
457 int c2;
458 ScmObj form;
459
460 c2 = Scm_GetcUnsafe(port);
461 switch (c2) {
462 case '=':
463 /* #?=form - debug print */
464 form = read_item(port, ctx);
465 return SCM_LIST2(SCM_SYM_DEBUG_PRINT, form);
466 case EOF:
467 return SCM_EOF;
468 default:
469 Scm_ReadError(port, "unsupported #?-syntax: #?%C", c2);
470 }
471 }
472 case '0': case '1': case '2': case '3': case '4':
473 case '5': case '6': case '7': case '8': case '9':
474 /* #N# or #N= form */
475 return read_reference(port, c1, ctx);
476 case '*':
477 /* #*"...." byte string
478 #*01001001 for bit vector, maybe in future. */
479 {
480 int c2;
481 c2 = Scm_GetcUnsafe(port);
482 if (c2 == '"') return read_string(port, TRUE, ctx);
483 Scm_ReadError(port, "unsupported #*-syntax: #*%C", c2);
484 }
485 case ';':
486 /* #;expr - comment out sexpr */
487 read_item(port, ctx); /* read and discard */
488 return SCM_UNDEFINED; /* indicate this is a comment */
489 default:
490 Scm_ReadError(port, "unsupported #-syntax: #%C", c1);
491 }
492 }
493 case '\'': return read_quoted(port, SCM_SYM_QUOTE, ctx);
494 case '`': return read_quoted(port, SCM_SYM_QUASIQUOTE, ctx);
495 case ':':
496 return read_keyword(port, ctx);
497 case ',':
498 {
499 int c1 = Scm_GetcUnsafe(port);
500 if (c1 == EOF) {
501 Scm_ReadError(port, "unterminated unquote");
502 } else if (c1 == '@') {
503 return read_quoted(port, SCM_SYM_UNQUOTE_SPLICING, ctx);
504 } else {
505 Scm_UngetcUnsafe(c1, port);
506 return read_quoted(port, SCM_SYM_UNQUOTE, ctx);
507 }
508 }
509 case '|':
510 return read_escaped_symbol(port, '|');
511 case '[':
512 /* TODO: make it customizable */
513 return read_list(port, ']', ctx);
514 case '{':
515 return read_list(port, '}', ctx);
516 case '+':; case '-':
517 /* Note: R5RS doesn't permit identifiers beginning with '+' or '-',
518 but some Scheme programs use such identifiers. */
519 return read_symbol_or_number(port, c, ctx);
520 case '.':;
521 {
522 int c1 = Scm_GetcUnsafe(port);
523 if (!char_word_constituent(c1)) {
524 Scm_ReadError(port, "dot in wrong context");
525 }
526 Scm_UngetcUnsafe(c1, port);
527 return read_symbol_or_number(port, c, ctx);
528 }
529 case '0':; case '1':; case '2':; case '3':; case '4':;
530 case '5':; case '6':; case '7':; case '8':; case '9':;
531 /* Note: R5RS doesn't permit identifiers beginning with digits,
532 but some Scheme programs use such identifiers. */
533 return read_symbol_or_number(port, c, ctx);
534 case ')':; case ']':; case '}':;
535 Scm_ReadError(port, "extra close parenthesis");
536 case EOF:
537 return SCM_EOF;
538 default:
539 return read_symbol(port, c, ctx);
540 }
541 }
542
543 static ScmObj read_item(ScmPort *port, ScmReadContext *ctx)
544 {
545 for (;;) {
546 ScmObj obj = read_internal(port, ctx);
547 if (!SCM_UNDEFINEDP(obj)) return obj;
548 }
549 }
550
551 /*----------------------------------------------------------------
552 * List
553 */
554
555 /* Internal read_list. returns whether the list contains unresolved
556 reference or not within the flag has_ref */
557 static ScmObj read_list_int(ScmPort *port, ScmChar closer,
558 ScmReadContext *ctx, int *has_ref, int start_line)
559 {
560 ScmObj start = SCM_NIL, last = SCM_NIL, item;
561 int c, dot_seen = FALSE, ref_seen = FALSE;
562
563 for (;;) {
564 c = skipws(port, ctx);
565 if (c == EOF) goto eoferr;
566 if (c == closer) {
567 *has_ref = !!ref_seen;
568 return start;
569 }
570
571 if (dot_seen) goto baddot;
572
573 if (c == '.') {
574 int c2 = Scm_GetcUnsafe(port);
575 if (c2 == closer) {
576 goto baddot;
577 } else if (c2 == EOF) {
578 goto eoferr;
579 } else if (isspace(c2)) {
580 /* dot pair at the end */
581 if (start == SCM_NIL) goto baddot;
582 item = read_item(port, ctx);
583 if (SCM_READ_REFERENCE_P(item)) ref_seen = TRUE;
584 SCM_SET_CDR(last, item);
585 dot_seen = TRUE;
586 continue;
587 }
588 Scm_UngetcUnsafe(c2, port);
589 item = read_symbol_or_number(port, c, ctx);
590 } else {
591 Scm_UngetcUnsafe(c, port);
592 item = read_internal(port, ctx);
593 if (SCM_UNDEFINEDP(item)) continue;
594 if (SCM_READ_REFERENCE_P(item)) ref_seen = TRUE;
595 }
596 SCM_APPEND1(start, last, item);
597 }
598 eoferr:
599 if (start_line >= 0) {
600 Scm_ReadError(port, "EOF inside a list (starting from line %d)",
601 start_line);
602 } else {
603 Scm_ReadError(port, "EOF inside a list");
604 }
605 baddot:
606 Scm_ReadError(port, "bad dot syntax");
607 return SCM_NIL; /* dummy */
608 }
609
610 static ScmObj read_list(ScmPort *port, ScmChar closer, ScmReadContext *ctx)
611 {
612 int has_ref;
613 int line = -1;
614 ScmObj r;
615
616 if (ctx->flags & SCM_READ_SOURCE_INFO) line = Scm_PortLine(port);
617
618 r = read_list_int(port, closer, ctx, &has_ref, line);
619
620 if (SCM_PAIRP(r) && (ctx->flags & SCM_READ_SOURCE_INFO) && line >= 0) {
621 /* Swap the head of the list for an extended pair to record
622 source-code info.*/
623 r = Scm_ExtendedCons(SCM_CAR(r), SCM_CDR(r));
624 Scm_PairAttrSet(SCM_PAIR(r), SCM_SYM_SOURCE_INFO,
625 SCM_LIST2(Scm_PortName(port), SCM_MAKE_INT(line)));
626 }
627
628 if (has_ref) ref_push(ctx, r, SCM_FALSE);
629 return r;
630 }
631
632 static ScmObj read_vector(ScmPort *port, ScmChar closer, ScmReadContext *ctx)
633 {
634 int has_ref;
635 int line = -1;
636 ScmObj r;
637
638 if (ctx->flags & SCM_READ_SOURCE_INFO) line = Scm_PortLine(port);
639 r = read_list_int(port, closer, ctx, &has_ref, line);
640 r = Scm_ListToVector(r, 0, -1);
641 if (has_ref) ref_push(ctx, r, SCM_FALSE);
642 return r;
643 }
644
645 static ScmObj read_quoted(ScmPort *port, ScmObj quoter, ScmReadContext *ctx)
646 {
647 int line = -1;
648 ScmObj item, r;
649
650 if (ctx->flags & SCM_READ_SOURCE_INFO) line = Scm_PortLine(port);
651 item = read_item(port, ctx);
652 if (SCM_EOFP(item)) Scm_ReadError(port, "unterminated quote");
653 if (line >= 0) {
654 r = Scm_ExtendedCons(quoter, Scm_Cons(item, SCM_NIL));
655 Scm_PairAttrSet(SCM_PAIR(r), SCM_SYM_SOURCE_INFO,
656 SCM_LIST2(Scm_PortName(port), SCM_MAKE_INT(line)));
657 } else {
658 r = Scm_Cons(quoter, Scm_Cons(item, SCM_NIL));
659 }
660 if (SCM_READ_REFERENCE_P(item)) ref_push(ctx, SCM_CDR(r), SCM_FALSE);
661 return r;
662 }
663
664 /*----------------------------------------------------------------
665 * String
666 */
667
668 static ScmChar read_string_xdigits(ScmPort *port, int ndigs, int key,
669 int incompletep)
670 {
671 char buf[8];
672 int nread;
673 ScmChar r;
674 SCM_ASSERT(ndigs <= 8);
675 r = Scm_ReadXdigitsFromPort(port, ndigs, buf, &nread);
676 if (r == SCM_CHAR_INVALID) {
677 ScmDString ds;
678 int c, i;
679 /* skip chars to the end of string, so that the reader will read
680 after the erroneous string */
681 for (;;) {
682 if (incompletep) c = Scm_GetbUnsafe(port);
683 else c = Scm_GetcUnsafe(port);
684 if (c == EOF || c == '"') break;
685 if (c == '\\') {
686 if (incompletep) c = Scm_GetbUnsafe(port);
687 else c = Scm_GetcUnsafe(port);
688 }
689 }
690 /* construct an error message */
691 Scm_DStringInit(&ds);
692 Scm_DStringPutc(&ds, '\\');
693 Scm_DStringPutc(&ds, key);
694 for (i=0; i<nread; i++) Scm_DStringPutc(&ds, (unsigned char)buf[i]);
695 Scm_ReadError(port,
696 "Bad '\\%c' escape sequence in a string literal: %s",
697 key, Scm_DStringGetz(&ds));
698 }
699 return r;
700 }
701
702 static ScmObj read_string(ScmPort *port, int incompletep,
703 ScmReadContext *ctx)
704 {
705 int c = 0;
706 ScmDString ds;
707 Scm_DStringInit(&ds);
708
709 #define FETCH(var) \
710 if (incompletep) { var = Scm_GetbUnsafe(port); } \
711 else { var = Scm_GetcUnsafe(port); }
712 #define ACCUMULATE(var) \
713 if (incompletep) { SCM_DSTRING_PUTB(&ds, var); } \
714 else { SCM_DSTRING_PUTC(&ds, var); }
715
716 for (;;) {
717 FETCH(c);
718 switch (c) {
719 case EOF: goto eof_exit;
720 case '"': {
721 int flags = ((incompletep? SCM_STRING_INCOMPLETE : 0)
722 | SCM_STRING_IMMUTABLE);
723 return Scm_DStringGet(&ds, flags);
724 }
725 case '\\': {
726 int c1 = Scm_GetcUnsafe(port);
727 switch (c1) {
728 case EOF: goto eof_exit;
729 case 'n': ACCUMULATE('\n'); break;
730 case 'r': ACCUMULATE('\r'); break;
731 case 'f': ACCUMULATE('\f'); break;
732 case 't': ACCUMULATE('\t'); break;
733 case '\\': ACCUMULATE('\\'); break;
734 case '0': ACCUMULATE(0); break;
735 case 'x': {
736 int cc = read_string_xdigits(port, 2, 'x', incompletep);
737 ACCUMULATE(cc);
738 break;
739 }
740 case 'u': {
741 int cc = read_string_xdigits(port, 4, 'u', incompletep);
742 ACCUMULATE(Scm_UcsToChar(cc));
743 break;
744 }
745 case 'U': {
746 int cc = read_string_xdigits(port, 8, 'U', incompletep);
747 ACCUMULATE(Scm_UcsToChar(cc));
748 break;
749 }
750 default:
751 ACCUMULATE(c1); break;
752 }
753 break;
754 }
755 default: ACCUMULATE(c); break;
756 }
757 }
758 eof_exit:
759 Scm_ReadError(port, "EOF encountered in a string literal: %S",
760 Scm_DStringGet(&ds, 0));
761 /* NOTREACHED */
762 return SCM_FALSE;
763 }
764
765 /*----------------------------------------------------------------
766 * Character
767 */
768
769 static struct char_name {
770 const char *name;
771 ScmObj ch;
772 } char_names[] = {
773 { "space", SCM_MAKE_CHAR(' ') },
774 { "newline", SCM_MAKE_CHAR('\n') },
775 { "nl", SCM_MAKE_CHAR('\n') },
776 { "lf", SCM_MAKE_CHAR('\n') },
777 { "return", SCM_MAKE_CHAR('\r') },
778 { "cr", SCM_MAKE_CHAR('\r') },
779 { "tab", SCM_MAKE_CHAR('\t') },
780 { "ht", SCM_MAKE_CHAR('\t') },
781 { "page", SCM_MAKE_CHAR('\f') },
782 { "escape", SCM_MAKE_CHAR(0x1b) },
783 { "esc", SCM_MAKE_CHAR(0x1b) },
784 { "delete", SCM_MAKE_CHAR(0x7f) },
785 { "del", SCM_MAKE_CHAR(0x7f) },
786 { "null", SCM_MAKE_CHAR(0) },
787 { NULL, 0 }
788 };
789
790 static ScmObj read_char(ScmPort *port, ScmReadContext *ctx)
791 {
792 int c;
793 ScmString *name;
794 const char *cname;
795 u_int namelen, namesize;
796 struct char_name *cntab = char_names;
797
798 c = Scm_GetcUnsafe(port);
799 switch (c) {
800 case EOF: Scm_ReadError(port, "EOF encountered in character literal");
801 case '(':; case ')':; case '[':; case ']':; case '{':; case '}':;
802 case '"':; case ' ':; case '\\':; case '|':; case ';':;
803 case '#':;
804 return SCM_MAKE_CHAR(c);
805 default:
806 /* need to read word to see if it is a character name */
807 name = SCM_STRING(read_word(port, c, ctx, TRUE));
808 cname = Scm_GetStringContent(name, &namesize, &namelen, NULL);
809 if (namelen == 1) {
810 return SCM_MAKE_CHAR(c);
811 }
812 if (namelen != namesize) {
813 /* no character name contains multibyte chars */
814 goto unknown;
815 }
816
817 /* handle #\x1f etc. */
818 if (cname[0] == 'x' && isxdigit(cname[1])) {
819 int code = Scm_ReadXdigitsFromString(cname+1, namesize-1, NULL);
820 if (code < 0) goto unknown;
821 return SCM_MAKE_CHAR(code);
822 }
823 /* handle #\uxxxx or #\uxxxxxxxx*/
824 if ((cname[0] == 'u') && isxdigit(cname[1])) {
825 int code;
826 if (namesize == 5 || namesize == 9) {
827 code = Scm_ReadXdigitsFromString(cname+1, namesize-1, NULL);
828 if (code >= 0) return SCM_MAKE_CHAR(Scm_UcsToChar(code));
829 }
830 /* if we come here, it's an error. */
831 Scm_ReadError(port, "Bad UCS character code: #\\%s", cname);
832 }
833
834 while (cntab->name) {
835 if (strncmp(cntab->name, cname, namesize) == 0) return cntab->ch;
836 cntab++;
837 }
838 unknown:
839 Scm_ReadError(port, "Unknown character name: #\\%A", name);
840 }
841 return SCM_UNDEFINED; /* dummy */
842 }
843
844 /*----------------------------------------------------------------
845 * Symbols and Numbers
846 */
847
848 /* Reads a sequence of word-constituent characters from PORT, and returns
849 ScmString. INITIAL may be a readahead character, or SCM_CHAR_INVALID
850 if there's none. TEMP_CASE_FOLD turns on case-fold mode regardless of
851 the read context setting.
852 */
853 static ScmObj read_word(ScmPort *port, ScmChar initial, ScmReadContext *ctx,
854 int temp_case_fold)
855 {
856 int c = 0;
857 int case_fold = temp_case_fold || (ctx->flags & SCM_READ_CASE_FOLD);
858 ScmDString ds;
859 Scm_DStringInit(&ds);
860 if (initial != SCM_CHAR_INVALID) {
861 if (case_fold && char_word_case_fold(initial)) initial = tolower(initial);
862 SCM_DSTRING_PUTC(&ds, initial);
863 }
864
865 for (;;) {
866 c = Scm_GetcUnsafe(port);
867 if (c == EOF || !char_word_constituent(c)) {
868 Scm_UngetcUnsafe(c, port);
869 return Scm_DStringGet(&ds, 0);
870 }
871 if (case_fold && char_word_case_fold(c)) c = tolower(c);
872 SCM_DSTRING_PUTC(&ds, c);
873 }
874 }
875
876 static ScmObj read_symbol(ScmPort *port, ScmChar initial, ScmReadContext *ctx)
877 {
878 ScmString *s = SCM_STRING(read_word(port, initial, ctx, FALSE));
879 return Scm_Intern(s);
880 }
881
882 static ScmObj read_number(ScmPort *port, ScmChar initial, ScmReadContext *ctx)
883 {
884 ScmString *s = SCM_STRING(read_word(port, initial, ctx, FALSE));
885 ScmObj num = Scm_StringToNumber(s, 10, TRUE);
886 if (num == SCM_FALSE)
887 Scm_ReadError(port, "bad numeric format: %S", s);
888 return num;
889 }
890
891 static ScmObj read_symbol_or_number(ScmPort *port, ScmChar initial, ScmReadContext *ctx)
892 {
893 ScmString *s = SCM_STRING(read_word(port, initial, ctx, FALSE));
894 ScmObj num = Scm_StringToNumber(s, 10, TRUE);
895 if (num == SCM_FALSE)
896 return Scm_Intern(s);
897 else
898 return num;
899 }
900
901 static ScmObj read_keyword(ScmPort *port, ScmReadContext *ctx)
902 {
903 ScmString *s = SCM_STRING(read_word(port, SCM_CHAR_INVALID, ctx, FALSE));
904 return Scm_MakeKeyword(s);
905 }
906
907 static ScmObj read_escaped_symbol(ScmPort *port, ScmChar delim)
908 {
909 int c = 0;
910 ScmDString ds;
911 Scm_DStringInit(&ds);
912
913 for (;;) {
914 c = Scm_GetcUnsafe(port);
915 if (c == EOF) {
916 goto err;
917 } else if (c == delim) {
918 ScmString *s = SCM_STRING(Scm_DStringGet(&ds, 0));
919 return Scm_Intern(s);
920 } else if (c == '\\') {
921 /* CL-style single escape */
922 c = Scm_GetcUnsafe(port);
923 /* TODO: we should recognize \xNN, since the symbol writer
924 prints a symbol name in that syntax. */
925 if (c == EOF) goto err;
926 SCM_DSTRING_PUTC(&ds, c);
927 } else {
928 SCM_DSTRING_PUTC(&ds, c);
929 }
930 }
931 err:
932 Scm_ReadError(port, "unterminated escaped symbol: |%s ...",
933 Scm_DStringGetz(&ds));
934 return SCM_UNDEFINED; /* dummy */
935 }
936
937 /*----------------------------------------------------------------
938 * Regexp & charset
939 */
940
941 /* gauche extension : #/regexp/ */
942 static ScmObj read_regexp(ScmPort *port)
943 {
944 ScmChar c = 0;
945 ScmDString ds;
946 Scm_DStringInit(&ds);
947 for (;;) {
948 c = Scm_GetcUnsafe(port);
949 if (c == SCM_CHAR_INVALID) {
950 Scm_ReadError(port, "unterminated literal regexp");
951 }
952 if (c == '\\') {
953 SCM_DSTRING_PUTC(&ds, c);
954 c = Scm_GetcUnsafe(port);
955 if (c == SCM_CHAR_INVALID) {
956 Scm_ReadError(port, "unterminated literal regexp");
957 }
958 SCM_DSTRING_PUTC(&ds, c);
959 } else if (c == '/') {
960 /* Read one more char to see if we have a flag */
961 int flags = 0;
962 c = Scm_GetcUnsafe(port);
963 if (c == 'i') flags |= SCM_REGEXP_CASE_FOLD;
964 else Scm_UngetcUnsafe(c, port);
965 return Scm_RegComp(SCM_STRING(Scm_DStringGet(&ds, 0)), flags);
966 } else {
967 SCM_DSTRING_PUTC(&ds, c);
968 }
969 }
970 }
971
972 /* gauche extension : #[charset] */
973 static ScmObj read_charset(ScmPort *port)
974 {
975 return Scm_CharSetRead(port, NULL, TRUE, FALSE);
976 }
977
978 /*----------------------------------------------------------------
979 * Back reference (#N# and #N=)
980 */
981
982 static ScmObj read_reference(ScmPort *port, ScmChar ch, ScmReadContext *ctx)
983 {
984 ScmHashEntry *e = NULL;
985 int refnum = Scm_DigitToInt(ch, 10);
986
987 for (;;) {
988 ch = Scm_GetcUnsafe(port);
989 if (ch == EOF) {
990 Scm_ReadError(port, "unterminated reference form (#digits)");
991 }
992 if (SCM_CHAR_ASCII_P(ch) && isdigit(ch)) {
993 refnum = refnum*10+Scm_DigitToInt(ch, 10);
994 if (refnum < 0) Scm_ReadError(port, "reference number overflow");
995 continue;
996 }
997 if (ch != '#' && ch != '=') {
998 Scm_ReadError(port, "invalid reference form (must be either #digits# or #digits=) : #%d%A", refnum, SCM_MAKE_CHAR(ch));
999 }
1000 break;
1001 }
1002 if (ch == '#') {
1003 /* #digit# - back reference */
1004 if (ctx->table == NULL
1005 || (e = Scm_HashTableGet(ctx->table, Scm_MakeInteger(refnum))) == NULL) {
1006 Scm_ReadError(port, "invalid reference number in #%d#", refnum);
1007 }
1008 if (SCM_READ_REFERENCE_P(e->value)
1009 && SCM_READ_REFERENCE_REALIZED(e->value)) {
1010 return SCM_READ_REFERENCE(e->value)->value;
1011 } else {
1012 return e->value;
1013 }
1014 } else {
1015 /* #digit= - register */
1016 ScmObj val;
1017 ScmObj ref = Scm_MakeReadReference();
1018
1019 if (ctx->table == NULL) {
1020 ctx->table =
1021 SCM_HASH_TABLE(Scm_MakeHashTableSimple(SCM_HASH_EQV, 0));
1022 }
1023 if (Scm_HashTableGet(ctx->table, Scm_MakeInteger(refnum)) != NULL) {
1024 Scm_ReadError(port, "duplicate back-reference number in #%d=", refnum);
1025 }
1026 ref_register(ctx, ref, refnum);
1027 val = read_item(port, ctx);
1028 SCM_READ_REFERENCE(ref)->value = val;
1029 return val;
1030 }
1031 }
1032
1033 /*----------------------------------------------------------------
1034 * SRFI-10 support
1035 */
1036
1037 ScmObj Scm_DefineReaderCtor(ScmObj symbol, ScmObj proc, ScmObj finisher)
1038 {
1039 ScmObj pair;
1040 if (!SCM_PROCEDUREP(proc)) {
1041 Scm_Error("procedure required, but got %S\n", proc);
1042 }
1043 pair = Scm_Cons(proc, finisher);
1044 (void)SCM_INTERNAL_MUTEX_LOCK(readCtorData.mutex);
1045 Scm_HashTablePut(readCtorData.table, symbol, pair);
1046 (void)SCM_INTERNAL_MUTEX_UNLOCK(readCtorData.mutex);
1047 return SCM_UNDEFINED;
1048 }
1049
1050 static ScmObj read_sharp_comma(ScmPort *port, ScmReadContext *ctx)
1051 {
1052 int len, has_ref, line=-1;
1053 ScmChar next;
1054 ScmObj form, r;
1055
1056 next = Scm_GetcUnsafe(port);
1057 if (next != '(') {
1058 Scm_ReadError(port, "bad #,-form: '(' should be followed, but got %C",
1059 next);
1060 }
1061
1062 if (ctx->flags & SCM_READ_SOURCE_INFO) line = Scm_PortLine(port);
1063
1064 form = read_list_int(port, ')', ctx, &has_ref, line);
1065 len = Scm_Length(form);
1066 if (len <= 0) {
1067 Scm_ReadError(port, "bad #,-form: #,%S", form);
1068 }
1069 r = process_sharp_comma(port, SCM_CAR(form), SCM_CDR(form), ctx, has_ref);
1070 return r;
1071 }
1072
1073 static ScmObj process_sharp_comma(ScmPort *port, ScmObj key, ScmObj args,
1074 ScmReadContext *ctx, int has_ref)
1075 {
1076 ScmHashEntry *e;
1077 ScmObj r;
1078
1079 (void)SCM_INTERNAL_MUTEX_LOCK(readCtorData.mutex);
1080 e = Scm_HashTableGet(readCtorData.table, key);
1081 (void)SCM_INTERNAL_MUTEX_UNLOCK(readCtorData.mutex);
1082
1083 if (e == NULL) Scm_ReadError(port, "unknown #,-key: %S", key);
1084 SCM_ASSERT(SCM_PAIRP(e->value));
1085 r = Scm_Apply(SCM_CAR(e->value), args);
1086 if (has_ref) ref_push(ctx, r, SCM_CDR(e->value));
1087 return r;
1088 }
1089
1090 static ScmObj reader_ctor(ScmObj *args, int nargs, void *data)
1091 {
1092 ScmObj optarg = (nargs > 2? args[2] : SCM_FALSE);
1093 return Scm_DefineReaderCtor(args[0], args[1], optarg);
1094 }
1095
1096 /*----------------------------------------------------------------
1097 * Uvector
1098 */
1099
1100 /* Uvector support is implemented by extention. When the extention
1101 is loaded, it sets up the pointer Scm_ReadUvectorHook. */
1102
1103 static ScmObj maybe_uvector(ScmPort *port, char ch, ScmReadContext *ctx)
1104 {
1105 ScmChar c1, c2 = SCM_CHAR_INVALID;
1106 char *tag = NULL;
1107
1108 c1 = Scm_GetcUnsafe(port);
1109 if (ch == 'f') {
1110 if (c1 != '3' && c1 != '6') {
1111 Scm_UngetcUnsafe(c1, port);
1112 return SCM_FALSE;
1113 }
1114 c2 = Scm_GetcUnsafe(port);
1115 if (c1 == '3' && c2 == '2') tag = "f32";
1116 else if (c1 == '6' && c2 == '4') tag = "f64";
1117 } else {
1118 if (c1 == '8') tag = (ch == 's')? "s8" : "u8";
1119 else if (c1 == '1') {
1120 c2 = Scm_GetcUnsafe(port);
1121 if (c2 == '6') tag = (ch == 's')? "s16" : "u16";
1122 }
1123 else if (c1 == '3') {
1124 c2 = Scm_GetcUnsafe(port);
1125 if (c2 == '2') tag = (ch == 's')? "s32" : "u32";
1126 }
1127 else if (c1 == '6') {
1128 c2 = Scm_GetcUnsafe(port);
1129 if (c2 == '4') tag = (ch == 's')? "s64" : "u64";
1130 }
1131 }
1132 if (tag == NULL) {
1133 char buf[SCM_CHAR_MAX_BYTES*4], *bufp = buf;
1134 *bufp++ = ch;
1135 SCM_CHAR_PUT(bufp, c1);
1136 bufp += SCM_CHAR_NBYTES(c1);
1137 if (c2 != SCM_CHAR_INVALID) {
1138 SCM_CHAR_PUT(bufp, c2);
1139 bufp += SCM_CHAR_NBYTES(c2);
1140 }
1141 *bufp = '\0';
1142 Scm_ReadError(port, "invalid uniform vector tag: %s", buf);
1143 }
1144 if (Scm_ReadUvectorHook == NULL) {
1145 /* Require srfi-4 (gauche/uvector)
1146 NB: we don't need mutex here, for the loading of srfi-4 is
1147 serialized in Scm_Require. */
1148 Scm_Require(SCM_MAKE_STR("gauche/uvector"));
1149 if (Scm_ReadUvectorHook == NULL)
1150 Scm_ReadError(port, "couldn't load srfi-4 module");
1151 }
1152 return Scm_ReadUvectorHook(port, tag, ctx);
1153 }
1154
1155 /*----------------------------------------------------------------
1156 * Initialization
1157 */
1158
1159 void Scm__InitRead(void)
1160 {
1161 readCtorData.table =
1162 SCM_HASH_TABLE(Scm_MakeHashTableSimple(SCM_HASH_EQ, 0));
1163 (void)SCM_INTERNAL_MUTEX_INIT(readCtorData.mutex);
1164 Scm_DefineReaderCtor(SCM_SYM_DEFINE_READER_CTOR,
1165 Scm_MakeSubr(reader_ctor, NULL, 2, 1,
1166 SCM_SYM_DEFINE_READER_CTOR),
1167 SCM_FALSE);
1168 }
1169