;; Wrapper around Johnson's YACC. Automatically figures out lex forms ;; and makes it possible to write the augments more tersely. Also ;; rewrites the parser name, allowing the possibility of multiple parsers. ;; ;; We can take productions from any of a variety of forms and turn them into ;; the form JYACC expects. Examples of permissible production forms are ;; (S -> E $) ;; (S -> E $ (%0)) ;; (S (E $)) ;; (S (E $) %0) ;; The productions have three parts: a left-hand symbol, a right-hand side, ;; and an augment. I want to allow the augment to have a reasonable default, ;; which means I need a syntax which will allow me to unambigously tell where ;; the RHS ends and the augment begins. I'd also like not to constantly ;; type #'(lambda (a b c) ...); instead, I'd like just to specify the meat ;; of the augment in terms of a function body. ;; ;; In the first form (the one that looks like Johnson's original style), ;; I will require that any non-default augment code should be in a list. ;; In the form I prefer, I can dispense with making the augment code a new ;; list, because the right hand side of the production is in a list; in ;; this case the augment body is just the cddr of the production form. ;; ;; I assume that the arguments to the augment function are labeled by ;; position, starting from %0. ;; ;; See the end of the file for an example grammar. (use-package :lalr) ;;; ------------------------------------------- ;;; General utilities ;; Create a symbol for a positional parameter (defun pos-param-symbol (i) (intern (format nil "%~A" i))) ;; Create a list of positional parameter symbols (defun pos-param-symbol-list-1 (start end) (when (< start end) (cons (pos-param-symbol start) (pos-param-symbol-list-1 (1+ start) end)))) ;; Create a zero-based list of positional parameter symbols (defun pos-param-symbol-list (n) (pos-param-symbol-list-1 0 n)) ;; Union a set into the rhs (defmacro setf-union (lhs rhs) `(setf ,lhs (union ,lhs ,rhs))) ;;; ------------------------------------------- ;;; Wrapper macro around Johnson's YACC ;; Take lhs, rhs, and augment, and produce a production in Johnson's form ;; If the augment is nil, return a reasonable default. (defun make-jyacc-form (lhs rhs aug) (let ((nrhs (length rhs))) `(,lhs --> ,@rhs ,@(if aug `(#'(lambda ,(pos-param-symbol-list nrhs) ,@aug)) `(#'(lambda (&rest r) (if (null (cdr r)) (car r) r))))))) ;; Pull apart either of my favorite production formats into components (defun process-jyacc-production (prod) (if (symbolp (second prod)) (progn ;; The RHS is not in parens (let* ((aug (find-if #'listp prod)) ; Get augment (prod (remove-if #'listp prod)) ; Rest of prod (lhs (car prod)) (rhs (cddr prod))) (values lhs rhs aug))) (progn ;; The RHS is parenthesized (let* ((lhs (first prod)) (rhs (second prod)) (aug (cddr prod))) (values lhs rhs aug))))) ;; Pull apart my favorite format and reassemble into Johnson's format (defun filter-jyacc-production (prod) (multiple-value-bind (lhs rhs aug) (process-jyacc-production prod) (make-jyacc-form lhs rhs aug))) ;; Find the terminal symbols (anything that doesn't appear on a RHS) (defun jyacc-find-terminals (productions) (let ((all-symbols nil) (lhs-symbols nil)) (dolist (p productions) (multiple-value-bind (lhs rhs aug) (process-jyacc-production p) (setf-union all-symbols rhs) (pushnew lhs all-symbols) (pushnew lhs lhs-symbols))) (when (member t all-symbols) (error "T must not be a grammar symbol")) (set-difference all-symbols lhs-symbols))) (defmacro def-jyacc (name &rest grammar) `(let ((parse-fun (make-parser ',(map 'list #'filter-jyacc-production grammar) '(,@(jyacc-find-terminals grammar) nil) '*eof*))) (setf (second parse-fun) ',name) (eval parse-fun) (compile ',name))) ;;; ------------------------------------------- ;;; Test case: Grammar 3.10 from AWA #| (def-jyacc parser-310 (S (Exp) %0) (Exp (Exp + Term) `(+ ,%0 ,%2)) (Exp (Exp - Term) `(- ,%0 ,%2)) (Exp (Term)) (Term (Term * Fac) `(* ,%0 ,%2)) (Term (Fac)) (Fac (id)) (Fac (num)) (Fac (#\( Exp #\)) %1)) (defun parse-310 (tokens) (labels ((next () (let ((tok (pop tokens))) (print (cond ((null tok) (cons '*eof* nil)) ((numberp tok) (cons 'num tok)) ((member tok '(+ - * #\( #\))) (cons tok tok)) (t (cons 'id tok)))))) (err () (format t "~%Parse error before token ~A" (car tokens)))) (parser-310 #'next #'err))) |#