;;; ------------------------------------------- ;;; General utilities (defun prod-lhs (p) (first p)) (defun prod-rhs (p) (second p)) (defun prod-rule (p) (third p)) ;; Create a symbol for a positional parameter (defun pos-param-symbol (i) (intern (format nil "%~A" i))) ;; Create a list of positional parameter symbols (defun pos-param-symbol-list-1 (start end) (when (< start end) (cons (pos-param-symbol start) (pos-param-symbol-list-1 (1+ start) end)))) ;; Create a zero-based list of positional parameter symbols (defun pos-param-symbol-list (n) (pos-param-symbol-list-1 0 n)) ;; Union a set into the rhs (defmacro setf-union (lhs rhs) `(setf ,lhs (union ,lhs ,rhs))) ;;; ------------------------------------------- ;;; First/follow/null calculations ;; Define a structure to keep track of first, follow, and nullable (defstruct llset (first (make-hash-table :test 'equal)) (follow (make-hash-table :test 'equal)) (nullable (make-hash-table :test 'equal)) (changed nil)) (defmacro llfirst (sym lls) `(gethash ,sym (llset-first ,lls))) (defmacro llfollow (sym lls) `(gethash ,sym (llset-follow ,lls))) (defmacro llnull (sym lls) `(gethash ,sym (llset-nullable ,lls))) (defmacro llchange (lls) `(llset-changed ,lls)) (defun add-first (sym l lls) "Merge l into FIRST[sym]" (let ((old-first (llfirst sym lls))) (unless (subsetp l old-first) (setf (llfirst sym lls) (union l old-first)) (setf (llchange lls) t)))) (defun add-follow (sym l lls) "Merge l into FOLLOW[sym]" (let ((old-follow (llfollow sym lls))) (unless (subsetp l old-follow) (setf (llfollow sym lls) (union l old-follow)) (setf (llchange lls) t)))) (defun add-null (sym lls) "Set NULL[sym] = t" (unless (llnull sym lls) (setf (llnull sym lls) t) (setf (llchange lls) t))) (defun init-llset (terminals) "Create a new llset and initialize FIRST entries for all terminals" (let ((lls (make-llset))) (dolist (terminal terminals) (setf (llfirst terminal lls) `(,terminal))) (setf (llchange lls) t) lls)) ;; Define functions to compute all nullable symbols (defun nullable-p (rhs lls) "Return true if rhs is a nullable sequence" (if rhs (and (llnull (car rhs) lls) (nullable-p (cdr rhs) lls)) t)) (defun update-nullable (productions lls) "Expand nullables set" (setf (llchange lls) nil) (dolist (p productions) (if (nullable-p (prod-rhs p) lls) (add-null (prod-lhs p) lls))) (llchange lls)) (defun compute-nullable (productions lls) "Iterate until nullable is computed" (while (update-nullable productions lls))) ;; Define functions to compute first sets (defun update-production-first (lhs rhs lls) "Update the first set based on a production" (when rhs (add-first lhs (llfirst (car rhs) lls) lls) (if (llnull (car rhs) lls) (update-production-first lhs (cdr rhs) lls)))) (defun update-first (productions lls) "Expand first set" (setf (llchange lls) nil) (dolist (p productions) (update-production-first (prod-lhs p) (prod-rhs p) lls)) (llchange lls)) (defun compute-first (productions lls) "Compute first set by fixed-point iteration" (while (update-first productions lls))) ;; Define functions to compute follow sets ;; The update rule for one production ;; p -> y1 y2 ... yn is ;; is ;; 1. if (yi+1 .. yn) is nullable, update follow(yi) with follow(p) ;; 2. if (yi+1 .. yj-1) is nullable, update follow(yi) with first(yj) ;; We have one procedure to apply each of these rules. The tail recursions ;; give the same effect as the double nested loop described in Appel. (defun update-follow-production (p lls) "Update the follow sets based on a production" (let ((lhs (prod-lhs p)) (rhs (prod-rhs p))) (update-follow-production-1 lhs (car rhs) (cdr rhs) lls))) (defun update-follow-production-1 (lhs yi yrest lls) (when yi (if (nullable-p yrest lls) (add-follow yi (llfollow lhs lls) lls)) (update-follow-production-2 yi (car yrest) (cdr yrest) lls) (update-follow-production-1 lhs (car yrest) (cdr yrest) lls))) (defun update-follow-production-2 (yi yj yrest lls) (when yj (add-follow yi (llfirst yj lls) lls) (if (llnull yj lls) (update-follow-production-2 yi (car yrest) (cdr yrest) lls)))) (defun update-follow (productions lls) "Update follow sets" (setf (llchange lls) nil) (dolist (p productions) (update-follow-production p lls)) (llchange lls)) (defun compute-follow (productions lls) "Iterate to a fixed point on follow sets" (while (update-follow productions lls))) ;; Compute nullable, first, and follow (defun compute-llsets (terminals productions) "Fixed point iterations to find first, follow, and nullable" (let ((lls (init-llset terminals))) (compute-nullable productions lls) (compute-first productions lls) (compute-follow productions lls) lls)) ;;; ------------------------------------------- ;;; LL(1) parse table construction (defstruct ll1-parser terminals productions table get-token lookahead) (defun llstring-first (string lls) "Compute the first set for a string" (if string (union (llfirst (car string) lls) (if (llnull (car string) lls) (llstring-first (cdr string) lls))))) (defun lookahead-tokens (lhs rhs lls) "Compute the lookahead tokens" (union (llstring-first rhs lls) (if (nullable-p rhs lls) (llfollow lhs lls)))) (defun make-ll1 (terminals productions) "Create an LL(1) parser structure" (let ((ll1 (make-ll1-parser))) (setf (ll1-parser-terminals ll1) terminals) (setf (ll1-parser-productions ll1) productions) (setf (ll1-parser-table ll1) (build-ll1-table terminals productions)) ll1)) (defun build-ll1-table (terminals productions) "Compute a hash table (on nonterminals) of terminal-production pairs" (let ((lls (compute-llsets terminals productions)) (table (make-hash-table))) (dolist (p productions) (let ((lhs (prod-lhs p)) (rhs (prod-rhs p)) (action (prod-rule p))) (add-ll1-table lhs rhs action (lookahead-tokens lhs rhs lls) table))) table)) (defun add-ll1-table (lhs rhs action lookahead table) "Add a production to the parse table" ;; (format t "Add ~A -> ~A on ~A~%" lhs rhs lookahead) (let ((row (gethash lhs table))) (dolist (token lookahead) (if (assoc token row) (format t "Conflict on ~A: ~A -> ~A vs -> ~A~%" token lhs (cadr (assoc token row)) rhs) (push `(,token ,rhs ,action) row))) (setf (gethash lhs table) row))) ;;; ------------------------------------------- ;;; LL(1) parser (setf *print-ll1-expand* nil) (define-condition my-parse-error (error) ((text :initarg :text :reader text))) (defun parse-ll1 (ll1 &optional sym) "Return a parse tree via recursive descent" (let ((sym (or sym (caar (ll1-parser-productions ll1))))) (if (member sym (ll1-parser-terminals ll1)) (let* ((next-tok (funcall (ll1-parser-lookahead ll1)))) (unless (eq sym next-tok) (error 'my-parse-error :text (format nil "Expected ~A, saw ~A" sym next-tok))) (funcall (ll1-parser-get-token ll1))) (let* ((next-tok (funcall (ll1-parser-lookahead ll1))) (table (ll1-parser-table ll1)) (rule (assoc next-tok (gethash sym table)))) (if (not rule) (error 'my-parse-error :text (format nil "Error at ~A, cannot expand ~A" next-tok sym)) (if *print-ll1-expand* (format t "~A -> ~A~%" sym (second rule)))) (let ((rhs-tree (mapcar #'(lambda (s) (parse-ll1 ll1 s)) (second rule)))) (if (third rule) (apply (third rule) rhs-tree) (cons sym rhs-tree))))))) ;;; ------------------------------------------- ;;; Macro to construct a LL(1) parser structure (defun find-terminals (productions) (let ((all-symbols nil) (lhs-symbols nil)) (dolist (p productions) (setf-union all-symbols (prod-rhs p)) (pushnew (prod-lhs p) all-symbols) (pushnew (prod-lhs p) lhs-symbols)) (set-difference all-symbols lhs-symbols))) (defmacro def-ll1 (name &rest rules) `(setf ,name (make-ll1 ',(find-terminals rules) (list ,@(map 'list #'(lambda (rule) (let* ((lhs (first rule)) (rhs (second rule)) (nrhs (length rhs)) (act (cddr rule))) (if act `(list ',lhs ',rhs #'(lambda ,(pos-param-symbol-list nrhs) ,@act)) `(list ',lhs ',rhs)))) rules))))) ;;; ------------------------------------------- ;;; Test case (grammar with conflicts) #| (setf *grammar-312-terminals* '(a b c d)) (setf *grammar-312-rules* '((Z (d)) (Z (X Y Z)) (Y ()) (Y (c)) (X (Y)) (X (a)))) (setf *grammar-312-lls* (compute-llsets *grammar-312-terminals* *grammar-312-rules*)) (format t "Test case 1: Grammar with conflicts~%") (make-ll1 *grammar-312-terminals* *grammar-312-rules*) (format t "~%") |# ;;; ------------------------------------------- ;;; Test case: simple expression #| (setf *g315-rules* '((S (E eof)) (E (T Ep)) (Ep (+ T Ep)) (Ep (- T ep)) (Ep ()) (T (F Tp)) (Tp (* F Tp)) (Tp (/ F Tp)) (Tp ()) (F (id)) (F (num)) (F (op E cp)))) (setf *g315-terminals* '(eof + - * / id num op cp)) (setf *g315-lls* (compute-llsets *g315-terminals* *g315-rules*)) (setf *g315-ll1* (make-ll1 *g315-terminals* *g315-rules*)) (defun g315-parse (toks) (setf (ll1-parser-lookahead *g315-ll1*) #'(lambda () (car toks))) (setf (ll1-parser-get-token *g315-ll1*) #'(lambda () (format t "Consume ~A~%" (car toks)) (pop toks))) (parse-ll1 *g315-ll1*)) (setf *g315-stream* '(op num + num cp / num + num * num eof)) (setf *g315-tree* (g315-parse *g315-stream*)) |#