;; Wrapper around Johnson's YACC. Automatically figures out lex forms ;; and makes it possible to write the augments more tersely. Also ;; rewrites the parser name, allowing the possibility of multiple parsers. ;; ;; We can take productions from any of a variety of forms and turn them into ;; the form JYACC expects. Examples of permissible production forms are ;; (S -> E $) ;; (S -> E $ (%0)) ;; (S (E $)) ;; (S (E $) %0) ;; The productions have three parts: a left-hand symbol, a right-hand side, ;; and an augment. I want to allow the augment to have a reasonable default, ;; which means I need a syntax which will allow me to unambigously tell where ;; the RHS ends and the augment begins. I'd also like not to constantly ;; type #'(lambda (a b c) ...); instead, I'd like just to specify the meat ;; of the augment in terms of a function body. ;; ;; In the first form (the one that looks like Johnson's original style), ;; I will require that any non-default augment code should be in a list. ;; In the form I prefer, I can dispense with making the augment code a new ;; list, because the right hand side of the production is in a list; in ;; this case the augment body is just the cddr of the production form. ;; ;; I assume that the arguments to the augment function are labeled by ;; position, starting from %0. ;; ;; See the end of the file for an example grammar. (use-package :lalr) ;;; ------------------------------------------- ;;; General utilities ;; Create a symbol for a positional parameter (defun pos-param-symbol (i) (intern (format nil "%~A" i))) ;; Create a list of positional parameter symbols (defun pos-param-symbol-list-1 (start end) (when (< start end) (cons (pos-param-symbol start) (pos-param-symbol-list-1 (1+ start) end)))) ;; Create a zero-based list of positional parameter symbols (defun pos-param-symbol-list (n) (pos-param-symbol-list-1 0 n)) ;; Union a set into the rhs (defmacro setf-union (lhs rhs) `(setf ,lhs (union ,lhs ,rhs))) ;;; ------------------------------------------- ;;; Wrapper macro around Johnson's YACC ;; Allow some extensions to BNF: ;; [ x ]? -- for optional part ;; [ x ]* -- for zero-or-more ;; x or y -- for x | y ;; ' -- for literal symbols (e.g '[, 'or, etc) ;; ;; Examples: ;; (E (E [ and or 'or ] Ep) `(%1 %0 %2)) ;; (stmts ([ stmt #\; ]*)) ;; (class-decl (class id [ extends id ]?)) ;; ;; It would probably be nice to either extend the syntax further to ;; allow augments to be embedded in the production, or at least to ;; provide a mechanism for writing which things matter (e.g. only keep ;; the list of statements in the above example -- discard the ;; semicolons). ;; Grammar for our EBNF: ;; S -> P S' $ ;; S' -> or P S' ;; S' -> ;; P -> [ S ] P ;; P -> [ S ]* P ;; P -> [ S ]? P ;; P -> (defun make-jyacc-extended-form (lhs rhs aug) (let ((tokens rhs) (rules nil)) (labels ;; Categorize the next token ((peek () (let ((h (car tokens))) (cond ((null h) '$) ((member h '(or [ ] ]* ]?)) h) (t 'x)))) ;; Consume the next token (eat (type) (unless (eq (peek) type) (error "Invalid token ~s / ~s / ~s at ~s -> ~s" (car tokens) (peek) type lhs rhs)) (let ((h (pop tokens))) (if (listp h) (second h) h))) ;; Push another rule onto the stack (push-rule (lhs rhs aug) (push (make-jyacc-form lhs rhs aug) rules)) ;; S -> P' S' $ (S (lhs aug?) (Pp lhs aug?) (Sp lhs aug?)) ;; S' -> or P' S' ;; S' -> (Sp (lhs aug?) (when (eq (peek) 'or) (eat 'or) (Pp lhs aug?) (Sp lhs aug?))) ;; P' -> P (Pp (lhs aug?) (push-rule lhs (P) (when aug? aug))) ;; P -> sym P ;; P -> [ S ] P ;; P -> [ S ]+ P ;; P -> [ S ]* P ;; P -> (P () (case (peek) (x (cons (eat 'x) (P))) ([ (eat '[) (let ((X (gensym))) (S X nil) (case (peek) (] (eat ']) (cons X (P))) (]? (eat ']?) (cons (P? X) (P))) (]* (eat ']*) (cons (P* X) (P))) (eat 'error)))) (t nil))) ;; Produce a new zero-or-one rule (P? (X) (push-rule X nil nil) X) ;; Produce a new zero-or-more rule (P* (X) (let ((Y (gensym))) (push-rule Y `(,X ,Y) '((cons %0 %1))) (push-rule Y nil '(nil)) Y))) (S lhs aug) (eat '$) rules))) ;; Take lhs, rhs, and augment, and produce a production in Johnson's form ;; If the augment is nil, return a reasonable default. (defun make-jyacc-form (lhs rhs aug) (let ((nrhs (length rhs))) `(,lhs --> ,@rhs ,@(if aug `(#'(lambda ,(pos-param-symbol-list nrhs) ,@aug)) `(#'(lambda (&rest r) (if (null (cdr r)) (car r) r))))))) ;; Pull apart either of my favorite production formats into components (defun process-jyacc-production (prod) (if (symbolp (second prod)) (progn ;; The RHS is not in parens (let* ((aug (find-if #'listp prod)) ; Get augment (prod (remove-if #'listp prod)) ; Rest of prod (lhs (car prod)) (rhs (cddr prod))) (values lhs rhs aug))) (progn ;; The RHS is parenthesized (let* ((lhs (first prod)) (rhs (second prod)) (aug (cddr prod))) (values lhs rhs aug))))) ;; Pull apart my favorite format and reassemble into Johnson's format (defun filter-jyacc-production (prod) (multiple-value-bind (lhs rhs aug) (process-jyacc-production prod) (make-jyacc-extended-form lhs rhs aug))) ;; Postprocess all the productions in the grammar (defun filter-jyacc-grammar (grammar) (apply #'append (map 'list #'filter-jyacc-production grammar))) ;; Find the terminal symbols (anything that doesn't appear on a RHS) (defun jyacc-find-terminals (productions) (let ((all-symbols nil) (lhs-symbols nil)) (dolist (p productions) (multiple-value-bind (lhs rhs aug) (process-jyacc-production p) (setf-union all-symbols rhs) (pushnew lhs all-symbols) (pushnew lhs lhs-symbols))) (when (member t all-symbols) (error "T must not be a grammar symbol")) (set-difference all-symbols lhs-symbols))) (defmacro def-jyacc (name &rest grammar) `(let ((parse-fun (make-parser ',(filter-jyacc-grammar grammar) '(,@(jyacc-find-terminals grammar) nil) '*eof*))) (setf (second parse-fun) ',name) (eval parse-fun) (compile ',name))) ;;; ------------------------------------------- ;;; Test case: Grammar 3.10 from AWA #| (def-jyacc parser-310 (S (Exp) %0) (Exp (Exp + Term) `(+ ,%0 ,%2)) (Exp (Exp - Term) `(- ,%0 ,%2)) (Exp (Term)) (Term (Term * Fac) `(* ,%0 ,%2)) (Term (Fac)) (Fac (id)) (Fac (num)) (Fac (#\( Exp #\)) %1)) (defun parse-310 (tokens) (labels ((next () (let ((tok (pop tokens))) (print (cond ((null tok) (cons '*eof* nil)) ((numberp tok) (cons 'num tok)) ((member tok '(+ - * #\( #\))) (cons tok tok)) (t (cons 'id tok)))))) (err () (format t "~%Parse error before token ~A" (car tokens)))) (parser-310 #'next #'err))) |#