https://github.com/JuliaLang/julia
Raw File
Tip revision: 1187040d027210cc466f0b6a6d54118fd692cf2d authored by Stefan Karpinski on 08 March 2013, 04:30:57 UTC
VERSION: 0.1.2
Tip revision: 1187040
julia-parser.scm
(define ops-by-prec
  '#((= := += -= *= /= //= .//= .*= ./= |\\=| |.\\=| ^= .^= %= |\|=| &= $= => <<= >>= >>>= ~ |.+=| |.-=|)
     (?)
     (|\|\||)
     (&&)
     ; note: there are some strange-looking things in here because
     ; the way the lexer works, every prefix of an operator must also
     ; be an operator.
     (<- -- -->)
     (> < >= <= == === != |.>| |.<| |.>=| |.<=| |.==| |.!=| |.=| |.!| |<:| |>:| |&>| |&<|)
     (: |..|)
     (+ - |.+| |.-| |\|| $)
     (<< >> >>> |.<<| |.>>| |&>>| |&<<|)
     (* / |./| % & |.*| |\\| |.\\|)
     (// .//)
     (^ |.^|)
     (|::|)
     (|.|)))

(define-macro (prec-ops n) `(quote ,(aref ops-by-prec n)))

; disable range colon for parsing ternary conditional operator
(define range-colon-enabled #t)
; in space-sensitive mode "x -y" is 2 expressions, not a subtraction
(define space-sensitive #f)
(define inside-vec #f)
; treat 'end' like a normal symbol instead of a reserved word
(define end-symbol #f)
; treat newline like ordinary whitespace instead of as a potential separator
(define whitespace-newline #f)

(define current-filename 'none)

(define-macro (with-normal-ops . body)
  `(with-bindings ((range-colon-enabled #t)
		   (space-sensitive #f))
		  ,@body))

(define-macro (without-range-colon . body)
  `(with-bindings ((range-colon-enabled #f))
		  ,@body))

(define-macro (with-space-sensitive . body)
  `(with-bindings ((space-sensitive #t)
		   (whitespace-newline #f))
		  ,@body))

(define-macro (with-inside-vec . body)
  `(with-bindings ((space-sensitive #t)
		   (inside-vec #t)
		   (whitespace-newline #f))
		  ,@body))

(define-macro (with-inside-ref . body)
  `(with-bindings ((space-sensitive #f)
		   (inside-vec #t)
		   (whitespace-newline #t))
		  ,@body))

(define-macro (with-end-symbol . body)
  `(with-bindings ((end-symbol #t))
		  ,@body))

(define-macro (with-whitespace-newline . body)
  `(with-bindings ((whitespace-newline #t))
		  ,@body))

(define-macro (without-whitespace-newline . body)
  `(with-bindings ((whitespace-newline #f))
		  ,@body))

(define assignment-ops (prec-ops 0))

(define (assignment? e)
  (and (pair? e) (eq? (car e) '=)))

(define unary-ops '(+ - ! ~ |<:| |>:|))

; operators that are both unary and binary
(define unary-and-binary-ops '(+ - $ & ~))

; operators that are special forms, not function names
(define syntactic-operators
  '(= := += -= *= /= //= .//= .*= ./= |\\=| |.\\=| ^= .^= %= |\|=| &= $= =>
      <<= >>= >>>= -> --> |\|\|| && |::| |.| ...))
(define syntactic-unary-operators '($ &))

(define reserved-words '(begin while if for try return break continue
			 function macro quote let local global const
			 abstract typealias type bitstype ccall do
			 module baremodule using import export importall))

(define (syntactic-op? op) (memq op syntactic-operators))
(define (syntactic-unary-op? op) (memq op syntactic-unary-operators))

(define trans-op (string->symbol ".'"))
(define ctrans-op (string->symbol "'"))
(define vararg-op (string->symbol "..."))

(define operators (list* '~ '! '-> ctrans-op trans-op vararg-op
			 (delete-duplicates
			  (apply append (vector->list ops-by-prec)))))

(define op-chars
  (list->string
   (delete-duplicates
    (apply append
	   (map string->list (map symbol->string operators))))))

(define (dict-literal? l)
  (and (length= l 3) (eq? (car l) '=>)))

; --- lexer ---

(define special-char?
  (let ((chrs (string->list "()[]{},;\"`@")))
    (lambda (c) (memv c chrs))))
(define (newline? c) (eqv? c #\newline))
(define (identifier-char? c) (or (and (char>=? c #\A)
				      (char<=? c #\Z))
				 (and (char>=? c #\a)
				      (char<=? c #\z))
				 (and (char>=? c #\0)
				      (char<=? c #\9))
				 (char>=? c #\uA1)
				 (eqv? c #\_)))
;; characters that can be in an operator
(define (opchar? c) (string.find op-chars c))
;; characters that can follow . in an operator
(define (dot-opchar? c) (and (char? c) (string.find ".*^/\\" c)))
(define (operator? c) (memq c operators))

(define (skip-to-eol port)
  (let ((c (peek-char port)))
    (cond ((eof-object? c)    c)
	  ((eqv? c #\newline) c)
	  (else               (read-char port)
			      (skip-to-eol port)))))

(define (read-operator port c)
  (read-char port)
  (if (and (eqv? c #\*) (eqv? (peek-char port) #\*))
      (error "use ^ instead of **"))
  (if (or (eof-object? (peek-char port)) (not (opchar? (peek-char port))))
      (symbol (string c)) ; 1-char operator
      (let loop ((str (string c))
		 (c   (peek-char port)))
	(if (and (not (eof-object? c)) (opchar? c))
	    (let ((newop (string str c)))
	      (if (operator? (string->symbol newop))
		  (begin (read-char port)
			 (loop newop (peek-char port)))
		  (string->symbol str)))
	    (string->symbol str)))))

(define (accum-digits c pred port lz)
  (if (and (not lz) (eqv? c #\_))
      (cons "_" #f)
      (let loop ((str '())
		 (c c))
	(if (eqv? c #\_)
	    (begin (read-char port)
		   (let ((c (peek-char port)))
		     (if (and (not (eof-object? c)) (pred c))
			 (loop str c)
			 (begin
			   (io.ungetc port #\_)
			   (cons (list->string (reverse str)) #t)))))
	    (if (and (not (eof-object? c)) (pred c))
		(begin (read-char port)
		       (loop (cons c str) (peek-char port)))
		(cons (list->string (reverse str)) #t))))))

(define (char-hex? c)
  (or (char-numeric? c)
      (and (>= c #\a) (<= c #\f))
      (and (>= c #\A) (<= c #\F))))

(define (char-oct? c)
  (and (>= c #\0) (<= c #\7)))

(define (char-bin? c)
  (or (eqv? c #\0)
      (eqv? c #\1)))

(define (string-to-number s r)
  (string->number
    (if (< r 16)
        (string.map (lambda (c) (if (eqv? c #\f) #\e c)) s)
        s)
    r))

(define (read-number port leadingdot neg)
  (let ((str  (open-output-string))
	(pred char-numeric?)
        (is-float32-literal #f)
	(leadingzero #f))
    (define (allow ch)
      (let ((c (peek-char port)))
	(and (eqv? c ch)
	     (begin (write-char (read-char port) str) #t))))
    (define (disallow-dot)
      (if (eqv? (peek-char port) #\.)
	  (begin (read-char port)
		 (if (dot-opchar? (peek-char port))
		     (io.ungetc port #\.)
		     (error (string "invalid numeric constant "
				    (get-output-string str) #\.))))))
    (define (read-digs lz)
      (let ((D (accum-digits (peek-char port) pred port lz)))
	(let ((d  (car D))
	      (ok (cdr D)))
	  (if (not ok)
	      (begin (display d str)
		     (error (string "invalid numeric constant "
				    (get-output-string str)))))
	  (and (not (equal? d ""))
	       (not (eof-object? d))
	       (display d str)
	       #t))))
    (if neg (write-char #\- str))
    (if leadingdot
	(write-char #\. str)
	(if (eqv? (peek-char port) #\0)
	    (begin (write-char (read-char port) str)
		   (set! leadingzero #t)
		   (cond ((allow #\x)
			  (begin
			     (set! leadingzero #f)
			     (set! pred char-hex?)))
			 ((allow #\o)
			  (begin
			     (set! leadingzero #f)
			     (set! pred char-oct?)))
			 ((allow #\b)
			  (begin
			     (set! leadingzero #f)
			     (set! pred char-bin?)))))
	    (allow #\.)))
    (read-digs leadingzero)
    (if (eqv? (peek-char port) #\.)
	(begin (read-char port)
	       (if (dot-opchar? (peek-char port))
		   (io.ungetc port #\.)
		   (begin (write-char #\. str)
			  (read-digs #f)
			  (disallow-dot)))))
    (let ((c (peek-char port)))
      (if (or (eqv? c #\e) (eqv? c #\E) (eqv? c #\f))
	  (begin (read-char port)
		 (let ((d (peek-char port)))
		   (if (and (not (eof-object? d))
			    (or (char-numeric? d) (eqv? d #\+) (eqv? d #\-)))
		       (begin (set! is-float32-literal (eqv? c #\f))
			      (write-char c str)
			      (write-char (read-char port) str)
			      (read-digs #f)
			      (disallow-dot))
		       (io.ungetc port c))))
	  ; disallow digits after binary or octal literals, e.g., 0b12
	  (if (and (or (eq? pred char-bin?) (eq? pred char-oct?))
		   (not (eof-object? c))
		   (char-numeric? c))
	      (error (string "invalid numeric constant "
			     (get-output-string str) c)))))
    (let* ((s (get-output-string str))
	   (r (cond ((eq? pred char-hex?) 16)
		    ((eq? pred char-oct?) 8)
		    ((eq? pred char-bin?) 2)
		    (else 10)))
	   (n (string-to-number s r)))
      (if n
	  (cond ((eq? pred char-hex?) (sized-uint-literal n s 4))
		((eq? pred char-oct?) (sized-uint-oct-literal n s))
		((eq? pred char-bin?) (sized-uint-literal n s 1))
                (is-float32-literal   (float n))
		(else (if (and (integer? n) (> n 9223372036854775807))
			  (error (string "invalid numeric constant " s))
			  n)))
	  (error (string "invalid numeric constant " s))))))

(define (sized-uint-literal n s b)
  (let ((l (* (- (length s) 2) b)))
    (cond ((<= l 8)  (uint8  n))
	  ((<= l 16) (uint16 n))
	  ((<= l 32) (uint32 n))
	  (else      (uint64 n)))))

(define (sized-uint-oct-literal n s)
  (if (eqv? (string.char s 2) #\0)
    (sized-uint-literal n s 3)
    (cond ((< n 256)        (uint8  n))
	  ((< n 65536)      (uint16 n))
	  ((< n 4294967296) (uint32 n))
	  (else             (uint64 n)))))

(define (skip-ws-and-comments port)
  (skip-ws port #t)
  (if (eqv? (peek-char port) #\#)
      (begin (skip-to-eol port)
	     (skip-ws-and-comments port)))
  #t)

(define (next-token port s)
  (aset! s 2 (eq? (skip-ws port whitespace-newline) #t))
  (let ((c (peek-char port)))
    (cond ((or (eof-object? c) (newline? c))  (read-char port))

	  ((special-char? c)    (read-char port))

	  ((char-numeric? c)    (read-number port #f #f))

	  ((eqv? c #\#)         (skip-to-eol port) (next-token port s))

	  ; . is difficult to handle; it could start a number or operator
	  ((and (eqv? c #\.)
		(let ((c (read-char port))
		      (nextc (peek-char port)))
		  (cond ((eof-object? nextc)
			 '|.|)
			((char-numeric? nextc)
			 (read-number port #t #f))
			((opchar? nextc)
			 (string->symbol
			  (string-append (string c)
					 (symbol->string
					  (read-operator port nextc)))))
			(else '|.|)))))

	  ((opchar? c)  (read-operator port c))

	  ((identifier-char? c) (accum-julia-symbol c port))

	  #;((eqv? c #\")
	   (with-exception-catcher
	    (lambda (e)
	      (error "invalid string literal"))
	    (lambda () (read port))))

	  (else (error (string "invalid character " (read-char port)))))))

; --- parser ---

(define (make-token-stream s) (vector #f s #t #f))
(define-macro (ts:port s)       `(aref ,s 1))
(define-macro (ts:last-tok s)   `(aref ,s 0))
(define-macro (ts:set-tok! s t) `(aset! ,s 0 ,t))
(define-macro (ts:space? s)     `(aref ,s 2))
(define-macro (ts:pbtok s)      `(aref ,s 3))
(define (ts:put-back! s t)
  (if (ts:pbtok s)
      (error "too many pushed-back tokens (internal error)")
      (aset! s 3 t)))

(define (peek-token s)
  (or (ts:pbtok s)
      (ts:last-tok s)
      (begin (ts:set-tok! s (next-token (ts:port s) s))
	     (ts:last-tok s))))

(define (require-token s)
  (let ((t (or (ts:pbtok s) (ts:last-tok s) (next-token (ts:port s) s))))
    (if (eof-object? t)
	(error "incomplete: premature end of input")
	(if (newline? t)
	    (begin (take-token s)
		   (require-token s))
	    (begin (if (not (ts:pbtok s)) (ts:set-tok! s t))
		   t)))))

(define (take-token s)
  (or
   (begin0 (ts:pbtok s)
	   (aset! s 3 #f))
   (begin0 (ts:last-tok s)
	   (ts:set-tok! s #f))))

; parse left-to-right binary operator
; produces structures like (+ (+ (+ 2 3) 4) 5)
(define (parse-LtoR s down ops)
  (let loop ((ex (down s)))
    (let ((t   (peek-token s))
	  #;(spc (ts:space? s)))
      (if (not (memq t ops))
	  ex
	  (begin (take-token s)
		 (cond #;((and space-sensitive spc (memq t unary-and-binary-ops)
			     (not (eqv? (peek-char (ts:port s)) #\ )))
			(ts:put-back! s t)
			ex)
		       ((syntactic-op? t)
			(loop (list t ex (down s))))
		       (else
			(loop (list 'call t ex (down s))))))))))

; parse right-to-left binary operator
; produces structures like (= a (= b (= c d)))
(define (parse-RtoL s down ops)
  (let ((ex (down s)))
    (let ((t   (peek-token s))
	  (spc (ts:space? s)))
      (if (not (memq t ops))
	  ex
	  (begin (take-token s)
		 (cond ((and space-sensitive spc (memq t unary-and-binary-ops)
			     (not (eqv? (peek-char (ts:port s)) #\ )))
			(ts:put-back! s t)
			ex)
		       ((syntactic-op? t)
			(list t ex (parse-RtoL s down ops)))
		       (else
			(list 'call t ex (parse-RtoL s down ops)))))))))

(define (parse-cond s)
  (let ((ex (parse-or s)))
    (cond ((eq? (peek-token s) '?)
	   (begin (take-token s)
		  (let ((then (without-range-colon (parse-eq* s))))
		    (if (not (eq? (take-token s) ':))
			(error "colon expected in ? expression")
			(list 'if ex then (parse-cond s))))))
	  #;((string? ex)
	   (let loop ((args (list ex)))
	     (let ((next (peek-token s)))
	       (if (or (eof-object? next) (closing-token? next)
		       (newline? next))
		   `(call (top string) ,@(reverse args))
		   (loop (cons (parse-or s) args))))))
	  (else ex))))

(define (invalid-initial-token? tok)
  (or (eof-object? tok)
      (memv tok '(#\) #\] #\} else elseif catch finally =))))

(define (line-number-node s)
  `(line ,(input-port-line (ts:port s))))

(define (line-number-filename-node s)
  `(line ,(input-port-line (ts:port s)) ,current-filename))

;; insert line/file for short-form function defs, otherwise leave alone
(define (short-form-function-loc ex lno)
  (if (and (pair? ex)
	   (eq? (car ex) '=)
	   (pair? (cadr ex))
	   (eq? (caadr ex) 'call))
      `(= ,(cadr ex) (block (line ,lno ,current-filename) ,(caddr ex)))
      ex))

; parse a@b@c@... as (@ a b c ...) for some operator @
; op: the operator to look for
; head: the expression head to yield in the result, e.g. "a;b" => (block a b)
; closers: a list of tokens that will stop the process
;          however, this doesn't consume the closing token, just looks at it
; allow-empty: if true will ignore runs of the operator, like a@@@@b
; ow, my eyes!!
(define (parse-Nary s down ops head closers allow-empty)
  (if (invalid-initial-token? (require-token s))
      (error (string "unexpected " (peek-token s))))
  (if (memv (require-token s) closers)
      (list head)  ; empty block
      (let loop ((ex
		  ;; in allow-empty mode skip leading runs of operator
		  (if (and allow-empty (memv (require-token s) ops))
		      '()
		      (if (memv #\newline ops)
			  (let ((loc (line-number-node s)))
			    ;; note: line-number must happen before (down s)
			    (list (down s) loc))
			  (list (down s)))))
		 (first? #t))
	(let ((t (peek-token s)))
	  (if (not (memv t ops))
	      (begin
		(if (not (or (eof-object? t) (eqv? t #\newline) (memv #\, ops)
			     (memv t closers)))
		    (error "extra token after end of expression"))
		(if (or (null? ex) (pair? (cdr ex)) (not first?))
		    ;; () => (head)
		    ;; (ex2 ex1) => (head ex1 ex2)
		    ;; (ex1) if operator appeared => (head ex1) (handles "x;")
		    (cons head (reverse ex))
		    ;; (ex1) => ex1
		    (car ex)))
	      (begin (take-token s)
		     ; allow input to end with the operator, as in a;b;
		     (if (or (eof-object? (peek-token s))
			     (memv (peek-token s) closers)
			     (and allow-empty
				  (memv (peek-token s) ops)))
			 (loop ex #f)
			 (if (memv #\newline ops)
			     (let ((loc (line-number-node s)))
			       (loop (list* (down s) loc ex) #f))
			     (loop (cons (down s) ex) #f)))))))))

; colon is strange; 3 arguments with 2 colons yields one call:
; 1:2   => (: 1 2)
; 1:2:3 => (: 1 2 3)
; 1:    => (: 1 :)
; 1:2:  => (: 1 2 :)
;; not enabled:
;;; :2    => (: 2)
;;; :1:2  => (: (: 1 2))
;;; :1:   => (: (: 1 :))
; a simple state machine is up to the task.
; we will leave : expressions as a syntax form, not a call to ':',
; so they can be processed by syntax passes.
(define (parse-range s)
  (let loop ((ex (parse-expr s))
	     (first? #t))
    (let* ((t   (peek-token s))
	   (spc (ts:space? s)))
      (cond ((and first? (eq? t '|..|))
	     (take-token s)
	     `(call ,t ,ex ,(parse-expr s)))
	    ((and range-colon-enabled (eq? t ':))
	     (take-token s)
	     (if (and space-sensitive spc
		      (or (peek-token s) #t) (not (ts:space? s)))
		 ;; "a :b" in space sensitive mode
		 (begin (ts:put-back! s ':)
			ex)
		 (let ((argument
			(if (closing-token? (peek-token s))
			    ':  ; missing last argument
			    (parse-expr s))))
		   (if (and (not (ts:space? s))
			    (or (eq? argument '<) (eq? argument '>)))
		       (error (string ': argument " found instead of "
				      argument ':)))
		   (if first?
		       (loop (list t ex argument) #f)
		       (loop (append ex (list argument)) #t)))))
	    (else ex)))))

; the principal non-terminals follow, in increasing precedence order

(define (parse-block s) (parse-Nary s parse-eq '(#\newline #\;) 'block
				    '(end else elseif catch finally) #t))

;; ";" at the top level produces a sequence of top level expressions
(define (parse-stmts s)
  (let ((ex (parse-Nary s parse-eq '(#\;) 'toplevel '(#\newline) #t)))
    ;; check for unparsed junk after an expression
    (let ((t (peek-token s)))
      (if (not (or (eof-object? t) (eqv? t #\newline) (eq? t #f)))
	  (error "extra token after end of expression")))
    ex))

(define (parse-eq s)
  (let ((lno (input-port-line (ts:port s))))
    (short-form-function-loc
     (parse-RtoL s parse-comma (prec-ops 0)) lno)))

; parse-eq* is used where commas are special, for example in an argument list
(define (parse-eq* s)   (parse-RtoL s parse-cond  (prec-ops 0)))
; parse-comma is needed for commas outside parens, for example a = b,c
(define (parse-comma s) (parse-Nary s parse-cond  '(#\,) 'tuple '() #f))
(define (parse-or s)    (parse-LtoR s parse-and   (prec-ops 2)))
(define (parse-and s)   (parse-LtoR s parse-arrow (prec-ops 3)))
(define (parse-arrow s) (parse-RtoL s parse-ineq  (prec-ops 4)))
(define (parse-ineq s)  (parse-comparison s (prec-ops 5)))

; parse left to right, combining chains of certain operators into 1 call
; e.g. a+b+c => (call + a b c)
(define (parse-expr s)
  (let ((ops (prec-ops 7)))
    (let loop ((ex       (parse-shift s))
	       (chain-op #f))
      (let* ((t   (peek-token s))
	     (spc (ts:space? s)))
	(if (not (memq t ops))
	    ex
	    (begin
	      (take-token s)
	      (cond ((and space-sensitive spc (memq t unary-and-binary-ops)
			  (not (eqv? (peek-char (ts:port s)) #\ )))
		     ;; here we have "x -y"
		     (ts:put-back! s t)
		     ex)
		    ((eq? t chain-op)
		     (loop (append ex (list (parse-shift s)))
			   chain-op))
		    (else
		     (loop (list 'call t ex (parse-shift s))
			   (and (eq? t '+) t))))))))))

(define (parse-shift s) (parse-LtoR s parse-term (prec-ops 8)))

(define (parse-term s)
  (let ((ops (prec-ops 9)))
    (let loop ((ex       (parse-rational s))
	       (chain-op #f))
      (let ((t   (peek-token s))
	    (spc (ts:space? s)))
	(cond ((not (memq t ops))
	       ex)
	      ;; TODO: maybe parse 2x*y as (call * 2 x y)
	      ((eq? t chain-op)
	       (begin (take-token s)
		      (loop (append ex (list (parse-rational s)))
			    chain-op)))
	      (else
	       (begin (take-token s)
		      (if (and space-sensitive spc (memq t unary-and-binary-ops)
			       (not (eqv? (peek-char (ts:port s)) #\ )))
			  (begin (ts:put-back! s t)
				 ex)
			  (loop (list 'call t ex (parse-rational s))
				(and (eq? t '*) t))))))))))

(define (parse-rational s) (parse-LtoR s parse-unary (prec-ops 10)))

(define (parse-comparison s ops)
  (let loop ((ex (parse-range s))
	     (first #t))
    (let ((t (peek-token s)))
      (if (not (memq t ops))
	  ex
	  (begin (take-token s)
		 (if first
		     (loop (list 'comparison ex t (parse-range s)) #f)
		     (loop (append ex (list t (parse-range s))) #f)))))))

; flag an error for tokens that cannot begin an expression
(define (closing-token? tok)
  (or (eof-object? tok)
      (and (eq? tok 'end) (not end-symbol))
      (memv tok '(#\, #\) #\] #\} #\; else elseif catch finally))))

(define (maybe-negate op num)
  (if (eq? op '-) (- num) num))

; given an expression and the next token, is there a juxtaposition
; operator between them?
(define (juxtapose? expr t)
  (and (not (operator? t))
       (not (operator? expr))
       (not (memq t reserved-words))
       (not (closing-token? t))
       (not (newline? t))
       (or (number? expr)
	   (not (memv t '(#\( #\[ #\{))))))

(define (parse-juxtapose ex s)
  (let ((next (peek-token s)))
    ;; numeric literal juxtaposition is a unary operator
    (cond ((and (juxtapose? ex next)
		(not (ts:space? s)))
	   (begin
	     #;(if (and (number? ex) (= ex 0))
		 (error "juxtaposition with literal 0"))
	     `(call * ,ex ,(parse-unary s))))
	  (else ex))))

(define (parse-unary s)
  (let ((t (require-token s)))
    (if (closing-token? t)
	(error (string "unexpected " t)))
    (cond ((memq t unary-ops)
	   (let* ((op  (take-token s))
		  (nch (peek-char (ts:port s))))
	     (if (and (or (eq? op '-) (eq? op '+))
		      (or (and (char? nch) (char-numeric? nch))
			  (and (eqv? nch #\.) (read-char (ts:port s)))))
		 (let ((num
			(parse-juxtapose
			 (read-number (ts:port s) (eqv? nch #\.) (eq? op '-))
			 s)))
		   (if (memq (peek-token s) '(^ .^))
		       ;; -2^x parsed as (- (^ 2 x))
		       (begin (if (= num -9223372036854775808)
				  (error (string "invalid numeric constant "
						 (- num))))
			      (ts:put-back! s (maybe-negate op num))
			      (list 'call op (parse-factor s)))
		       num))
		 (let ((next (peek-token s)))
		   (cond ((closing-token? next)
			  op)  ; return operator by itself, as in (+)
			 ((eqv? next #\{)  ;; this case is +{T}(x::T) = ...
			  (ts:put-back! s op)
			  (parse-factor s))
			 (else
			  (let ((arg (parse-unary s)))
			    (if (and (pair? arg)
				     (eq? (car arg) 'tuple))
				(list* 'call op (cdr arg))
				(list  'call op arg)))))))))
	  (else
	   (parse-juxtapose (parse-factor s) s)))))

; handle ^, .^, and postfix ...
(define (parse-factor-h s down ops)
  (let ((ex (down s)))
    (let ((t (peek-token s)))
      (cond ((eq? t '...)
	     (take-token s)
	     (list '... ex))
	    ((not (memq t ops))
	     ex)
	    (else
	     (list 'call
		   (take-token s) ex (parse-factor-h s parse-unary ops)))))))

; -2^3 is parsed as -(2^3), so call parse-decl for the first argument,
; and parse-unary from then on (to handle 2^-3)
(define (parse-factor s)
  (parse-factor-h s parse-decl (prec-ops 11)))

(define (parse-decl s)
  (let loop ((ex (if (eq? (peek-token s) '|::|)
		     (begin (take-token s)
			    `(|::| ,(parse-call s)))
		     (parse-call s))))
    (let ((t (peek-token s)))
      (case t
	((|::|) (take-token s)
	 (loop (list t ex (parse-call s))))
	((->)   (take-token s)
	 ;; -> is unusual: it binds tightly on the left and
	 ;; loosely on the right.
	 (let ((lno (line-number-filename-node s)))
	   `(-> ,ex (block ,lno ,(parse-eq* s)))))
	(else
	 ex)))))

;; convert (comparison a <: b) to (<: a b)
(define (subtype-syntax e)
  (if (and (pair? e) (eq? (car e) 'comparison)
	   (length= e 4) (eq? (caddr e) '|<:|))
      `(<: ,(cadr e) ,(cadddr e))
      e))

(define (parse-unary-prefix s)
  (let ((op (peek-token s)))
    (if (syntactic-unary-op? op)
	(begin (take-token s)
	       (cond ((closing-token? (peek-token s))  op)
		     ((eq? op '&)  (list op (parse-call s)))
		     (else         (list op (parse-atom s)))))
	(parse-atom s))))

; parse function call, indexing, dot, and transpose expressions
; also handles looking for syntactic reserved words
(define (parse-call s)
  (let ((ex (parse-unary-prefix s)))
    (if (memq ex reserved-words)
	(parse-resword s ex)
	(let loop ((ex ex))
	  (let ((t (peek-token s)))
	    (if (or (and space-sensitive (ts:space? s)
			 (memv t '(#\( #\[ #\{ |'| #\")))
		    (and (number? ex)  ;; 2(...) is multiply, not call
			 (eqv? t #\()))
		ex
		(case t
		  ((#\( )   (take-token s)
		   (let ((al (parse-arglist s #\) )))
		     (if (eq? (peek-token s) 'do)
			 (begin
			   (take-token s)
			   (loop `(call ,ex ,(parse-do s) ,@al)))
			 (loop `(call ,ex ,@al)))))
		  ((#\[ )   (take-token s)
		   ; ref is syntax, so we can distinguish
		   ; a[i] = x  from
		   ; ref(a,i) = x
		   (let ((al (with-end-symbol (parse-ref s #\] ))))
		     (if (dict-literal? ex)
		       (if (and (not(null? al)) (eq? (car al) 'comprehension))
			  (if (and (not(null? (cdr al)))
				   (dict-literal? (cadr al)))
			      (loop (list* 'typed-dict-comprehension ex (cdr al)))
			      (error "invalid dict comprehension syntax"))
			  (if (every dict-literal? al)
			    (loop (list* 'typed-dict ex al))
			    (else (error "invalid dict literal"))))
		       (if (any dict-literal? al)
			 (error "invalid dict type specification")
			 (if (and (not(null? al)) (eq? (car al) 'comprehension))
			   (loop (list* 'typed-comprehension ex (cdr al)))
			   (loop (list* 'ref ex al)))))))
		  ((|.|)
		   (take-token s)
		   (if (eqv? (peek-token s) #\()
		       (loop `(|.| ,ex ,(parse-atom s)))
		       (let ((name (parse-atom s)))
			 (if (and (pair? name) (eq? (car name) 'macrocall))
			     `(macrocall (|.| ,ex (quote ,(cadr name)))
					 ,@(cddr name))
			     (loop `(|.| ,ex (quote ,name)))))))
		  ((|.'| |'|) (take-token s)
		   (loop (list t ex)))
		  ((#\{ )   (take-token s)
		   (loop (list* 'curly ex
				(map subtype-syntax (parse-arglist s #\} )))))
		  ((#\")
		   (if (and (symbol? ex) (not (operator? ex))
			    (not (ts:space? s)))
		       ;; custom prefixed string literals, x"s" => @x_str "s"
		       (let ((str (begin (take-token s)
					 (parse-string-literal s)))
			     (macname (symbol (string #\@ ex '_str))))
			 (let ((nxt (peek-token s)))
			   (if (and (symbol? nxt) (not (operator? nxt))
				    (not (ts:space? s)))
			       ;; string literal suffix, "s"x
			       (loop `(macrocall ,macname ,(car str)
						 ,(string (take-token s))))
			       (loop `(macrocall ,macname ,(car str))))))
		       ex))
		  (else ex))))))))

;(define (parse-dot s)  (parse-LtoR s parse-atom (prec-ops 13)))

(define expect-end-current-line 0)

(define (expect-end- s word)
  (let ((t (peek-token s)))
    (cond ((eq? t 'end) (take-token s))
	  ((eof-object? t)
	   (error (string "incomplete: " word " at "
			  current-filename ":" expect-end-current-line
			  " requires end")))
	  (else
	   (error (string word " at "
			  current-filename ":" expect-end-current-line
			  " expected end, got " t))))))

(define (parse-subtype-spec s)
  (subtype-syntax (parse-ineq s)))

; parse expressions or blocks introduced by syntactic reserved words
(define (parse-resword s word)
  (set! expect-end-current-line (input-port-line (ts:port s)))
  (define (expect-end s) (expect-end- s word))
  (with-normal-ops
  (without-whitespace-newline
  (case word
    ((begin)  (begin0 (parse-block s)
		      (expect-end s)))
    ((quote)  (begin0 (list 'quote (parse-block s))
		      (expect-end s)))
    ((while)  (begin0 (list 'while (parse-cond s) (parse-block s))
		      (expect-end s)))
    ((for)
     (let* ((ranges (parse-comma-separated-iters s))
	    (body   (parse-block s)))
       (expect-end s)
       (let nest ((r ranges))
	 (if (null? r)
	     body
	     `(for ,(car r) ,(nest (cdr r)))))))
    ((if)
     (let* ((test (parse-cond s))
	    (then (if (memq (require-token s) '(else elseif))
		      '(block)
		      (parse-block s)))
	    (nxt  (require-token s)))
       (take-token s)
       (case nxt
	 ((end)     (list 'if test then))
	 ((elseif)
	  `(if ,test ,then
	       ;; line number for elseif condition
	       (block ,(line-number-node s)
		      ,(parse-resword s 'if))))
	 ((else)    (list 'if test then (parse-resword s 'begin)))
	 (else      (error (string "unexpected " nxt))))))
    ((let)
     (let* ((binds (if (memv (peek-token s) '(#\newline #\;))
		       (begin (take-token s)
			      '())
		       (parse-comma-separated-assignments s)))
	    (ex    (parse-block s)))
       (expect-end s)
       `(let ,ex ,@binds)))
    ((global local)
     (let* ((const (and (eq? (peek-token s) 'const)
			(take-token s)))
	    (expr  (cons word (parse-comma-separated-assignments s))))
       (if const
	   `(const ,expr)
	   expr)))
    ((function macro)
     (let* ((paren (eqv? (require-token s) #\())
	    (sig   (parse-call s))
	    (def   (if (or (symbol? sig)
			   (and (pair? sig) (eq? (car sig) '|::|)
				(symbol? (cadr sig))))
		       (if paren
			   ;; in "function (x)" the (x) is a tuple
			   `(tuple ,sig)
			   ;; function foo  =>  syntax error
			   (error (string "expected ( in " word " definition")))
		       (if (not (and (pair? sig)
				     (or (eq? (car sig) 'call)
					 (eq? (car sig) 'tuple))))
			   (error (string "expected ( in " word " definition"))
			   sig)))
	    (loc   (begin (skip-ws-and-comments (ts:port s))
			  (line-number-filename-node s)))
	    (body  (parse-block s)))
       (expect-end s)
       (if (and (length> body 1)
		(pair? (cadr body))
		(eq? (caadr body) 'line))
	   (set-car! (cdr body) loc))
       (list word def body)))
    ((abstract)
     (list 'abstract (parse-subtype-spec s)))
    ((type)
     (let ((sig (parse-subtype-spec s)))
       (begin0 (list word sig (parse-block s))
	       (expect-end s))))
    ((bitstype)
     (list 'bitstype (with-space-sensitive (parse-cond s))
	   (parse-subtype-spec s)))
    ((typealias)
     (let ((lhs (parse-call s)))
       (if (and (pair? lhs) (eq? (car lhs) 'call))
	   ;; typealias X (...) is tuple type alias, not call
	   (list 'typealias (cadr lhs) (cons 'tuple (cddr lhs)))
	   (list 'typealias lhs (parse-arrow s)))))
    ((try)
     (let ((try-block (if (memq (require-token s) '(catch finally))
			  '(block)
			  (parse-block s))))
       (let loop ((nxt    (require-token s))
		  (catchb #f)
		  (catchv #f)
		  (finalb #f))
	 (take-token s)
	 (cond
	  ((eq? nxt 'end)
	   (list* 'try try-block catchv catchb (if finalb
						   (list finalb)
						   '())))
	  ((and (eq? nxt 'catch)
		(not catchb))
	   (let ((nl (eqv? (peek-token s) #\newline)))
	     (if (memq (require-token s) '(end finally))
		 (loop (require-token s)
		       '(block)
		       #f
		       finalb)
		 (let* ((var (parse-eq* s))
			(var? (and (not nl) (symbol? var)))
			(catch-block (if (eq? (require-token s) 'finally)
					 '(block)
					 (parse-block s))))
		   (loop (require-token s)
			 (if var?
			     catch-block
			     `(block ,var ,@(cdr catch-block)))
			 (and var? var)
			 finalb)))))
	  ((and (eq? nxt 'finally)
		(not finalb))
	   (let ((fb (if (eq? (require-token s) 'catch)
			 '(block)
			 (parse-block s))))
	     (loop (require-token s)
		   catchb
		   catchv
		   fb)))
	  (else    (error (string "unexpected " nxt)))))))
    ((return)          (let ((t (peek-token s)))
			 (if (or (eqv? t #\newline) (closing-token? t))
			     (list 'return '(null))
			     (list 'return (parse-eq s)))))
    ((break continue)  (list word))
    ((const)
     (let ((assgn (parse-eq s)))
       (if (not (and (pair? assgn)
		     (or (eq? (car assgn) '=)
			 (eq? (car assgn) 'global)
			 (eq? (car assgn) 'local))))
	   (error "expected assignment after const")
	   `(const ,assgn))))
    ((module baremodule)
     (let* ((name (parse-atom s))
	    (body (parse-block s)))
       (expect-end s)
       (list 'module (eq? word 'module) name
	     (if (eq? word 'module)
		 (list* 'block
			;; add definitions for module-local eval
			(let ((x (gensym)))
			  `(= (call eval ,x)
			      (call (|.| (top Core) 'eval) ,name ,x)))
			`(= (call eval m x)
			    (call (|.| (top Core) 'eval) m x))
			(cdr body))
		 body))))
    ((export)
     (let ((es (map macrocall-to-atsym
		    (parse-comma-separated s parse-atom))))
       (if (not (every symbol? es))
	   (error "invalid export statement"))
       `(export ,@es)))
    ((import using importall)
     (let ((imports (parse-comma-separated s (lambda (s)
					       (parse-import s word)))))
       (if (length= imports 1)
	   (car imports)
	   (cons 'toplevel imports))))
    ((ccall)
     (if (not (eqv? (peek-token s) #\())
	 'ccall
	 (begin
	   (take-token s)
	   (let ((al (parse-arglist s #\))))
	     (if (and (length> al 1)
		      (memq (cadr al) '(cdecl stdcall fastcall thiscall)))
		 ;; place (callingconv) at end of arglist
		 `(ccall ,(car al) ,@(cddr al) (,(cadr al)))
		 `(ccall ,.al))))))
    ((do)
     (error "invalid do syntax"))
    (else (error "unhandled reserved word"))))))

(define (parse-do s)
  (set! expect-end-current-line (input-port-line (ts:port s)))
  (let ((doargs (if (eqv? (peek-token s) #\newline)
		    '()
		    (parse-comma-separated-assignments s))))
    `(-> (tuple ,@doargs)
	 ,(begin0 (parse-block s)
		  (expect-end- s 'do)))))

(define (macrocall-to-atsym e)
  (if (and (pair? e) (eq? (car e) 'macrocall))
      (cadr e)
      e))

(define (parse-import s word)
  (let ((ns (macrocall-to-atsym (parse-atom s))))
    (let loop ((path (list ns)))
      (if (not (symbol? (car path)))
	  (error (string "invalid " word " statement: expected identifier")))
      (let ((nxt (peek-token s)))
	(cond #;((eq? nxt '|.*|)
	       (take-token s)
	       `(importall ,@(reverse path)))
	      ((eq? nxt '|.|)
	       (take-token s)
	       (loop (cons (macrocall-to-atsym (parse-atom s)) path)))
	      ((or (memv nxt '(#\newline #\; #\,))
		   (eof-object? nxt))
	       `(,word ,@(reverse path)))
	      ((eqv? (string.sub (string nxt) 0 1) ".")
	       (take-token s)
	       (loop (cons (symbol (string.sub (string nxt) 1))
			   path)))
	      (else
	       (error (string "invalid " word " statement"))))))))

; parse comma-separated assignments, like "i=1:n,j=1:m,..."
(define (parse-comma-separated s what)
  (let loop ((exprs '()))
    (let ((r (what s)))
      (case (peek-token s)
	((#\,)  (take-token s) (loop (cons r exprs)))
	(else   (reverse! (cons r exprs)))))))

(define (parse-comma-separated-assignments s)
  (parse-comma-separated s parse-eq*))

; as above, but allows both "i=r" and "i in r"
(define (parse-comma-separated-iters s)
  (let loop ((ranges '()))
    (let ((r (parse-eq* s)))
      (let ((r (cond ((and (pair? r) (eq? (car r) '=))
		      r)
		     ((eq? r ':)
		      r)
		     ((eq? (peek-token s) 'in)
		      (begin (take-token s)
			     `(= ,r ,(parse-eq* s))))
		     (else
		      (error "invalid iteration specification")))))
	(case (peek-token s)
	  ((#\,)  (take-token s) (loop (cons r ranges)))
	  (else   (reverse! (cons r ranges))))))))

(define (parse-space-separated-exprs s)
  (with-space-sensitive
   (let loop ((exprs '()))
     (if (or (closing-token? (peek-token s))
	     (newline? (peek-token s))
	     (and inside-vec (eq? (peek-token s) 'for)))
	 (reverse! exprs)
	 (let ((e (parse-eq s)))
	   (case (peek-token s)
	     ((#\newline)   (reverse! (cons e exprs)))
	     (else          (loop (cons e exprs)))))))))

(define (separate-keywords argl)
  (receive
   (kws args) (separate (lambda (x)
			  (and (pair? x) (eq? (car x) '=)))
			argl)
   (if (null? kws)
       args
       `(,@args (keywords ,@kws)))))

; handle function call argument list, or any comma-delimited list.
; . an extra comma at the end is allowed
; . expressions after a ; are enclosed in (parameters ...)
; . an expression followed by ... becomes (... x)
(define (parse-arglist s closer)
  (with-normal-ops
   (with-whitespace-newline
    (parse-arglist- s closer))))
(define (parse-arglist- s closer)
  (let loop ((lst '()))
    (let ((t (require-token s)))
      (if (equal? t closer)
	  (begin (take-token s)
		 (let ((lst (reverse lst)))
		   (if (eqv? closer #\) )
		       (separate-keywords lst)
		       lst)))
	  (if (equal? t #\;)
	      (begin (take-token s)
		     (if (equal? (peek-token s) closer)
			 ;; allow f(a, b; )
			 (begin (take-token s)
				(reverse lst))
			 (reverse (cons (cons 'parameters (loop '()))
					lst))))
	      (let* ((nxt (parse-eq* s))
		     (c (require-token s)))
		(cond ((eqv? c #\,)
		       (begin (take-token s) (loop (cons nxt lst))))
		      ((eqv? c #\;)          (loop (cons nxt lst)))
		      ((equal? c closer)     (loop (cons nxt lst)))
		      ;; newline character isn't detectable here
		      #;((eqv? c #\newline)
		       (error "unexpected line break in argument list"))
		      ((memv c '(#\] #\}))
		       (error (string "unexpected " c " in argument list")))
		      (else
		       (error (string "missing comma or " closer
				      " in argument list"))))))))))

; parse [] concatenation expressions and {} cell expressions
(define (parse-vcat s first closer)
  (let loop ((lst '())
	     (nxt first))
    (let ((t (require-token s)))
      (if (eqv? t closer)
	  (begin (take-token s)
		 (cons 'vcat (reverse (cons nxt lst))))
	  (case t
	    ((#\,)
	     (take-token s)
	     (if (eqv? (require-token s) closer)
		 ;; allow ending with ,
		 (begin (take-token s)
			(cons 'vcat (reverse (cons nxt lst))))
		 (loop (cons nxt lst) (parse-eq* s))))
	    ((#\;)
	     (error "unexpected semicolon in array expression"))
	    ((#\] #\})
	     (error (string "unexpected " t)))
	    (else
	     (error "missing separator in array expression")))))))

(define (parse-comprehension s first closer)
  (let ((r (parse-comma-separated-iters s)))
    (if (not (eqv? (require-token s) closer))
	(error (string "expected " closer))
	(take-token s))
    `(comprehension ,first ,@r)))

(define (parse-matrix s first closer)
  (define (fix head v) (cons head (reverse v)))
  (define (update-outer v outer)
    (cond ((null? v)       outer)
	  ((null? (cdr v)) (cons (car v) outer))
	  (else            (cons (fix 'row v) outer))))
  (define semicolon (eqv? (peek-token s) #\;))
  (let loop ((vec   (list first))
	     (outer '()))
    (let ((t  (if (eqv? (peek-token s) #\newline)
		  #\newline
		  (require-token s))))
      (if (eqv? t closer)
	  (begin (take-token s)
		 (if (pair? outer)
		     (fix 'vcat (update-outer vec outer))
		     (if (or (null? vec) (null? (cdr vec)))
			 (fix 'vcat vec)     ; [x]   => (vcat x)
			 (fix 'hcat vec))))  ; [x y] => (hcat x y)
	  (case t
	    ((#\; #\newline)
	     (take-token s) (loop '() (update-outer vec outer)))
	    ((#\,)
	     (error "unexpected comma in matrix expression"))
	    ((#\] #\})
	     (error (string "unexpected " t)))
	    ((for)
	     (if (and (not semicolon)
		      (length= outer 1)
		      (null? vec))
		 (begin (take-token s)
			(parse-comprehension s (car outer) closer))
		 (error "invalid comprehension syntax")))
	    (else
	     (loop (cons (parse-eq* s) vec) outer)))))))

(define (parse-cat s closer)
  (with-normal-ops
   (with-inside-vec
    (if (eqv? (require-token s) closer)
	(begin (take-token s)
	       (list 'vcat))  ; [] => (vcat)
	(let ((first (parse-eq* s)))
	  (case (peek-token s)
	    ;; dispatch to array syntax, comprehension, or matrix syntax
	    ((#\,)
	     (parse-vcat s first closer))
	    ;;((|\||)
	    ;; (error "old syntax"))
	    ((for)
	     (take-token s)
	     (parse-comprehension s first closer))
	    (else
	     (parse-matrix s first closer))))))))

(define (parse-ref s closer)
  (with-normal-ops
   (with-inside-ref
    (parse-ref- s closer))))
(define (parse-ref- s closer)
  (let loop ((lst '()))
    (let ((t (require-token s)))
      (if (equal? t closer)
	  (begin (take-token s)
		 (reverse lst))
	  (let* ((nxt (parse-eq* s))
		 (c (require-token s)))
	    (cond ((eqv? c #\,)
		   (begin (take-token s) (loop (cons nxt lst))))
		  ((equal? c closer)     (loop (cons nxt lst)))
		  ((eqv? c 'for)
		   (if (null? lst)
		     (begin (take-token s) (parse-comprehension s nxt closer))
		     (error "invalid comprehension syntax")))
		  (else
		   (error "invalid ref syntax"))))))))

; for sequenced evaluation inside expressions: e.g. (a;b, c;d)
(define (parse-stmts-within-expr s)
  (parse-Nary s parse-eq* '(#\;) 'block '(#\, #\) ) #t))

(define (parse-tuple s first)
  (let loop ((lst '())
	     (nxt first))
    (if (assignment? nxt)
	(error "invalid syntax in tuple"))
    (let ((t (require-token s)))
      (case t
	((#\))
	 (take-token s)
	 (cons 'tuple (reverse (cons nxt lst))))
	((#\,)
	 (take-token s)
	 (if (eqv? (require-token s) #\))
	     ;; allow ending with ,
	     (begin (take-token s)
		    (cons 'tuple (reverse (cons nxt lst))))
	     (loop (cons nxt lst) (parse-eq* s))))
	((#\;)
	 (error "unexpected semicolon in tuple"))
	#;((#\newline)
	 (error "unexpected line break in tuple"))
	((#\] #\})
	 (error (string "unexpected " t " in tuple")))
	(else
	 (error "missing separator in tuple"))))))

(define (not-eof-2 c)
  (if (eof-object? c)
      (error "incomplete: invalid ` syntax")
      c))

(define (parse-backquote s)
  (let ((b (open-output-string))
	(p (ts:port s)))
    (let loop ((c (read-char p)))
      (if (eqv? c #\`)
	  #t
	  (begin (if (eqv? c #\\)
		     (let ((nextch (read-char p)))
		       (if (eqv? nextch #\`)
			   (write-char nextch b)
			   (begin (write-char #\\ b)
				  (write-char (not-eof-2 nextch) b))))
		     (write-char (not-eof-2 c) b))
		 (loop (read-char p)))))
    (let ((str (io.tostring! b)))
      `(macrocall @cmd ,str))))

(define (not-eof-3 c)
  (if (eof-object? c)
      (error "incomplete: invalid string syntax")
      c))

; reads a raw string literal with no processing.
; quote can be escaped with \, but the \ is left in place.
; returns ("str" . b), b is a boolean telling whether interpolation is used
(define (parse-string-literal s)
  (let ((b (open-output-string))
	(p (ts:port s))
	(interpolate #f))
    (let loop ((c (read-char p)))
      (if (eqv? c #\")
	  #t
	  (begin (if (eqv? c #\\)
		     (let ((nextch (read-char p)))
		       (begin (write-char #\\ b)
			      (write-char (not-eof-3 nextch) b)))
		     (begin
		       (if (eqv? c #\$)
			   (set! interpolate #t))
		       (write-char (not-eof-3 c) b)))
		 (loop (read-char p)))))
    (cons (io.tostring! b) interpolate)))

(define (not-eof-1 c)
  (if (eof-object? c)
      (error "incomplete: invalid character literal")
      c))

(define (unescape-string s)
  (with-exception-catcher
   (lambda (e) (error "invalid escape sequence"))
   (lambda ()
     ;; process escape sequences using lisp read
     (read (open-input-string (string #\" s #\"))))))

; parse numbers, identifiers, parenthesized expressions, lists, vectors, etc.
(define (parse-atom s)
  (let ((ex (parse-atom- s)))
    (if (and (symbol? ex)
	     (memq ex syntactic-operators))
	(error (string "invalid identifier name " ex)))
    ex))

(define (parse-atom- s)
  (let ((t (require-token s)))
    (cond ((or (string? t) (number? t)) (take-token s))

	  ;; char literal
	  ((eq? t '|'|)
	   (take-token s)
	   (let ((firstch (read-char (ts:port s))))
	     (if (eqv? firstch #\')
	      (error "invalid character literal")
	      (if (and (not (eqv? firstch #\\))
		       (not (eof-object? firstch))
		       (eqv? (peek-char (ts:port s)) #\'))
	       ;; easy case: 1 character, no \
	       (begin (read-char (ts:port s)) firstch)
	       (let ((b (open-output-string)))
		 (let loop ((c firstch))
		   (if (eqv? c #\')
		       #t
		       (begin (write-char (not-eof-1 c) b)
			      (if (eqv? c #\\)
				  (write-char
				   (not-eof-1 (read-char (ts:port s))) b))
			      (loop (read-char (ts:port s))))))
		 (let ((str (unescape-string (io.tostring! b))))
		   (if (= (length str) 1)
		       ;; one byte, e.g. '\xff'. maybe not valid UTF-8, but we
		       ;; want to use the raw value as a codepoint in this case.
		       (wchar (aref str 0))
		       (if (or (not (= (string-length str) 1))
			       (not (string.isutf8 str)))
			   (error "invalid character literal")
			   (string.char str 0)))))))))

	  ;; symbol/expression quote
	  ((eq? t ':)
	   (take-token s)
	   (if (closing-token? (peek-token s))
	       ':
	       (let ((ex (parse-atom- s)))
		 (list 'quote ex))))

	  ;; misplaced =
	  ((eq? t '=) (error "unexpected ="))

	  ;; identifier
	  ((symbol? t) (take-token s))

	  ;; parens or tuple
	  ((eqv? t #\( )
	   (take-token s)
	   (with-normal-ops
	   (with-whitespace-newline
	   (if (eqv? (require-token s) #\) )
	       ;; empty tuple ()
	       (begin (take-token s) '(tuple))
	       (if (memq (peek-token s) syntactic-operators)
		   ;; allow (=) etc.
		   (let ((tok (take-token s)))
		     (if (not (eqv? (require-token s) #\) ))
			 (error (string "invalid identifier name " tok))
			 (take-token s))
		     tok)
		   ;; here we parse the first subexpression separately, so
		   ;; we can look for a comma to see if it's a tuple.
		   ;; this lets us distinguish (x) from (x,)
		   (let* ((ex (parse-eq* s))
			  (t (require-token s)))
		     (cond ((eqv? t #\) )
			    (take-token s)
			    (if (and (pair? ex) (eq? (car ex) '...))
				;; (ex...)
				`(tuple ,ex)
				;; value in parentheses (x)
				ex))
			   ((eqv? t #\, )
			    ;; tuple (x,) (x,y) (x...) etc.
			    (parse-tuple s ex))
			   ((eqv? t #\;)
			    ;; parenthesized block (a;b;c)
			    (take-token s)
			    (let* ((blk (parse-stmts-within-expr s))
				   (tok (require-token s)))
			      (if (eqv? tok #\,)
				  (error "unexpected comma in statement block"))
			      (if (not (eqv? tok #\)))
				  (error "missing separator in statement block"))
			      (take-token s)
			      `(block ,ex ,blk)))
			   #;((eqv? t #\newline)
			   (error "unexpected line break in tuple"))
			   ((memv t '(#\] #\}))
			    (error (string "unexpected " t " in tuple")))
			   (else
			    (error "missing separator in tuple")))))))))

	  ;; cell expression
	  ((eqv? t #\{ )
	   (take-token s)
	   (if (eqv? (require-token s) #\})
	       (begin (take-token s) '(cell1d))
	       (let ((vex (parse-cat s #\})))
		 (cond ((eq? (car vex) 'comprehension)
			(if (and (not (null? (cdr vex)))
				 (dict-literal? (cadr vex)))
			  `(typed-dict-comprehension (=> (top Any) (top Any)) ,@(cdr vex))
			  `(typed-comprehension (top Any) ,@(cdr vex))))
		       ((and (eq? (car vex) 'vcat)
			     (any dict-literal? (cdr vex)))
			(if (every dict-literal? (cdr vex))
			  `(typed-dict (=> (top Any) (top Any)) ,@(cdr vex))
			  (error "invalid dict literal")))
		       ((eq? (car vex) 'hcat)
			`(cell2d 1 ,(length (cdr vex)) ,@(cdr vex)))
		       (else  ; (vcat ...)
			(if (and (pair? (cadr vex)) (eq? (caadr vex) 'row))
			    (let ((nr (length (cdr vex)))
				  (nc (length (cdadr vex))))
			      ;; make sure all rows are the same length
			      (if (not (every
					(lambda (x)
					  (and (pair? x)
					       (eq? (car x) 'row)
					       (length= (cdr x) nc)))
					(cddr vex)))
				  (error "inconsistent shape in cell expression"))
			      `(cell2d ,nr ,nc
				       ,@(apply append
						;; transpose to storage order
						(apply map list
						       (map cdr (cdr vex))))))
			    (if (any (lambda (x) (and (pair? x)
						      (eq? (car x) 'row)))
				     (cddr vex))
				(error "inconsistent shape in cell expression")
				`(cell1d ,@(cdr vex)))))))))

	  ;; cat expression
	  ((eqv? t #\[ )
	   (take-token s)
	   (let ((vex (parse-cat s #\])))
	     (if (and (eq? (car vex) 'comprehension)
		      (dict-literal? (cadr vex)))
	       `(dict-comprehension ,@(cdr vex))
	       (if (any dict-literal? (cdr vex))
		 (if (every dict-literal? (cdr vex))
		   `(dict ,@(cdr vex))
		   (error "invalid dict literal"))
		 vex))))

	  ;; string literal
	  ((eqv? t #\")
	   (take-token s)
	   (let ((ps (parse-string-literal s)))
	     (if (cdr ps)
		 `(macrocall @str ,(car ps))
		 (let ((str (unescape-string (car ps))))
		   (if (not (string.isutf8 str))
		       (error "invalid UTF-8 sequence"))
		   str))))

	  ;; macro call
	  ((eqv? t #\@)
	   (take-token s)
	   (let ((head (with-space-sensitive
			(parse-call s))))
	     (if (and (pair? head) (eq? (car head) 'call))
		 `(macrocall ,(macroify-name (cadr head))
			     ,@(cddr head))
		 `(macrocall ,(macroify-name head)
			     ,@(parse-space-separated-exprs s)))))

	  ;; command syntax
	  ((eqv? t #\`)
	   (take-token s)
	   (parse-backquote s))

	  (else (error (string "invalid syntax: " (take-token s)))))))

(define (valid-modref? e)
  (and (length= e 3) (eq? (car e) '|.|) (pair? (caddr e))
       (eq? (car (caddr e)) 'quote) (symbol? (cadr (caddr e)))
       (or (symbol? (cadr e))
	   (valid-modref? (cadr e)))))

(define (macroify-name e)
  (cond ((symbol? e)  (symbol (string #\@ e)))
	((valid-modref? e)  `(|.| ,(cadr e)
			      (quote ,(macroify-name (cadr (caddr e))))))
	(else (error (string "invalid macro use @" e)))))

; --- main entry point ---

;; can optionally specify which grammar production to parse.
;; default is parse-stmts.
(define (julia-parse s . production)
  (cond ((string? s)
	 (apply julia-parse (make-token-stream (open-input-string s))
		production))
	((port? s)
	 (apply julia-parse (make-token-stream s) production))
	((eof-object? s)
	 s)
	(else
	 ;; as a special case, allow early end of input if there is
	 ;; nothing left but whitespace
	 (skip-ws-and-comments (ts:port s))
	 (let skip-loop ((tok (peek-token s)))
	   (if (or (eqv? tok #\newline) )
	       (begin (take-token s) (skip-loop (peek-token s)))))
	 (let ((t (peek-token s)))
	   (if (eof-object? t)
	       t
	       ((if (null? production) parse-stmts (car production))
		s))))))
back to top