;;;-*- Mode: Common-Lisp; Base:10; Package: tiger -*- ;; set up for johnson-yacc parser (use-package :lalr) ;;; ----- Example grammars ----- ;; parse simple expressions ;; like 3+4-5 or a*b+c. (def-jyacc G1 (exp -> exp #\+ term (`(+ ,%0 ,%2))) (exp -> exp #\- term (`(- ,%0 ,%2))) (exp -> term) (term -> term #\* factor (`(* ,%0 ,%2))) (term -> factor) (factor -> id (`(const-value ',%0))) (factor -> iconst (`(const-value ',%0))) (factor -> bconst (`(const-value ',%0)))) (def-jyacc G2 ;like G1 but with parens and floats (exp -> exp #\+ term (`(+ ,%0 ,%2))) (exp -> exp #\- term (`(- ,%0 ,%2))) (exp -> term) (term -> term #\* factor (`(* ,%0 ,%2))) (term -> factor) (factor -> #\( exp #\) (`(const-value ',%1))) (factor -> id (`(const-value ',%0))) (factor -> iconst (`(const-value ',%0))) (factor -> bconst (`(const-value ',%0))) (factor -> fconst (`(const-value ',%0)))) (def-jyacc g310 ; grammar 3.10 from AWA text (S -> E $ (%0)) (E -> E + Tm (`(+ ,%0 ,%2))) (E -> E - Tm (`(- ,%0 ,%2))) (E -> Tm) (Tm -> Tm * F (`(* ,%0 ,%2))) (Tm -> Tm / F (`(/ ,%0 ,%2))) (Tm -> F) (F -> id) (F -> num) (F -> |(| E |)| (%1))) (def-jyacc gif ; page 69 matching ifs (S -> M) (S -> U) (M -> if E then M else M (`(if ,%1 ,%3 ,%5))) (M -> id) (U -> if E then S (`(if ,%1 ,%3 nil))) (U -> if E then M else U (`(if ,%1 ,%3 ,%5))) (E -> id)) ;;; ----- Example parsers ----- ;; The parsers produced by Johnson's YACC take two function arguments: ;; next-input, which performs lexical analysis, and err, which prints a ;; diagnostic when there is an error. ;; To quote from the jyacc file: ;; next-input performs lexical analysis. It must return a cons cell. ;; its car holds the category, its cdr the value. ;; The def-jyacc wrapper assumes that the end of file will be marked by ;; a token with class *eof*. (defun parse-g310 (tokens) ;; parse from a list of tokens, e.g. ( (id x) + (id y) $) (labels ((next() (print (let ((h (pop tokens))) (cond ((null h) '(*eof* . *eof*)) ; If we're at EOF ((atom h) (cons h h)) ; If it's a +, return (+ . +) (t (cons (car h) h)))))) (err() (format t "~%error at ~s" tokens))) (g310 #'next #'err))) (defun parse-gif (tokens) ;; parse from a list of tokens, e.g. (if a then if b then c else d else e) (labels ((next() (let ((h (pop tokens))) (cond ((null h) '(*eof* . *eof*)) ((member h '(if then else)) (cons h h)) (t (cons 'id h))))) (err() (format t "~%error at ~s" tokens))) (gif #'next #'err))) ;; For your project, you will probably want to define next-input not ;; in terms of a list of tokens, but in terms of the output of the lexer ;; from assignment 2. Here's a next-input which uses the interface read1 ;; to fetch a token. (defun next-input(&aux (h (read1))) "Get next input using read1" (let ((tok (if (atom h) (cons h h) ; if it is +, return (+ . +) else (cons (car h) h)))) ; return (iconst . (iconst ..)) ;; This bit of magic is for those of you using ANSI Lisps at home. (if (and (symbolp (car tok)) (eq 'foo 'FOO)) `(,(intern (string-upcase (car tok))) . ,(cdr tok)) tok))) (defun lalr-parse-error() "Report an error if we're using read1-based next-input" (declare(special *linenum* *colnum*)) (format t "error before ~a at line ~a col ~a" (pc) *linenum* *colnum*)) ;; This is main parser function. It takes a string which is ;; used as a file name. It also takes an optional argument, which ;; is the name of the parser function. (defun jparse (file &optional (grammar #'G2)) "Parse the file using Johnson's YACC with the specified grammar" (let ((buf nil)(*linenum* 1)(*colnum* 0)) (declare (special buf *linenum* *colnum* *stream*)) (with-open-stream (*stream* (open file)) (funcall grammar #'next-input #'lalr-parse-error)))) ;; Here's a version that does the same thing, but with a literal string. (defun jparses (s &optional (grammar #'G2)) (let ((buf nil)(*linenum* 1)(*colnum* 0)) (declare (special buf *linenum* *colnum* *stream*)) (with-open-stream (*stream* (make-string-input-stream s)) (funcall grammar #'next-input #'lalr-parse-error)))) ;;; ----- Test cases ----- (defun const-value (token-const) "extract the (constant) value from an iconst, sconst, bconst fconst token" (second token-const)) #| ;; Examples: (parse-g310 '((id x) + (id y) $)) (parse-gif '(if a then if b then c else d else e)) (eval (jparses "1+2*3")) |# ;;; ----- Sample data for G1 ----- #| (print-lalr-states) STATE-0: $START --> . EXP, NIL On BCONST shift STATE-9 On ICONST shift STATE-8 On ID shift STATE-7 On FACTOR shift STATE-4 On TERM shift STATE-12 On EXP shift STATE-1 STATE-1: $START --> EXP ., NIL EXP --> EXP . + TERM, + - NIL EXP --> EXP . - TERM, + - NIL On + shift STATE-10 On - shift STATE-2 On NIL reduce EXP --> $START STATE-2: EXP --> EXP - . TERM, NIL - + On BCONST shift STATE-9 On ICONST shift STATE-8 On ID shift STATE-7 On FACTOR shift STATE-4 On TERM shift STATE-3 STATE-3: EXP --> EXP - TERM ., + - NIL On + - NIL reduce EXP - TERM --> EXP STATE-4: TERM --> FACTOR . * TERM, NIL - + TERM --> FACTOR ., NIL - + On * shift STATE-5 On NIL - + reduce FACTOR --> TERM STATE-5: TERM --> FACTOR * . TERM, + - NIL On BCONST shift STATE-9 On ICONST shift STATE-8 On ID shift STATE-7 On FACTOR shift STATE-4 On TERM shift STATE-6 STATE-6: TERM --> FACTOR * TERM ., NIL - + On NIL - + reduce FACTOR * TERM --> TERM STATE-7: FACTOR --> ID ., + - * NIL On + - * NIL reduce ID --> FACTOR STATE-8: FACTOR --> ICONST ., + - * NIL On + - * NIL reduce ICONST --> FACTOR STATE-9: FACTOR --> BCONST ., + - * NIL On + - * NIL reduce BCONST --> FACTOR STATE-10: EXP --> EXP + . TERM, NIL - + On BCONST shift STATE-9 On ICONST shift STATE-8 On ID shift STATE-7 On FACTOR shift STATE-4 On TERM shift STATE-11 STATE-11: EXP --> EXP + TERM ., + - NIL On + - NIL reduce EXP + TERM --> EXP STATE-12: EXP --> TERM ., + - NIL On + - NIL reduce TERM --> EXP ............... (setf *lalr-debug* t) T (remake g1) LALR-PARSER (jparse "testfile8") ; contents of testfile8 = abc1+xyz*r+77 Shift ID to STATE-7 Reduce ID --> FACTOR Shift FACTOR to STATE-4 Reduce FACTOR --> TERM Shift TERM to STATE-12 Reduce TERM --> EXP Shift EXP to STATE-1 Shift + to STATE-10 Shift ID to STATE-7 Reduce ID --> FACTOR Shift FACTOR to STATE-4 Shift * to STATE-5 Shift ID to STATE-7 Reduce ID --> FACTOR Shift FACTOR to STATE-4 Reduce FACTOR --> TERM Shift TERM to STATE-6 Reduce FACTOR * TERM --> TERM Shift TERM to STATE-11 Reduce EXP + TERM --> EXP Shift EXP to STATE-1 Shift + to STATE-10 Shift ICONST to STATE-8 Reduce ICONST --> FACTOR Shift FACTOR to STATE-4 Reduce FACTOR --> TERM Shift TERM to STATE-11 Reduce EXP + TERM --> EXP Shift EXP to STATE-1 Reduce EXP --> $START (+ (+ ABC1 (* XYZ R)) 77) ........... the actual lalr parser looks like this... (DEFUN LALR-PARSER (NEXT-INPUT PARSE-ERROR) (LET ((CAT-LA 'NIL) (VAL-LA 'NIL) (VAL-STACK 'NIL) (STATE-STACK 'NIL)) (LABELS ((INPUT-PEEK NIL (UNLESS CAT-LA (LET ((NEW (FUNCALL NEXT-INPUT))) (SETQ CAT-LA (LIST (CAR NEW))) (SETQ VAL-LA (LIST (CDR NEW))))) (FIRST CAT-LA)) (SHIFT-FROM (NAME) (PUSH NAME STATE-STACK) (POP CAT-LA) (PUSH (POP VAL-LA) VAL-STACK)) (REDUCE-CAT (NAME CAT NDAUGHTERS ACTION) (IF (EQ CAT '$START) (POP VAL-STACK) (LET ((DAUGHTER-VALUES 'NIL) (STATE NAME)) (DOTIMES (I NDAUGHTERS) (PUSH (POP VAL-STACK) DAUGHTER-VALUES) (SETQ STATE (POP STATE-STACK))) (PUSH CAT CAT-LA) (PUSH (APPLY ACTION DAUGHTER-VALUES) VAL-LA) (FUNCALL STATE)))) (STATE-0 NIL (CASE (INPUT-PEEK) (BCONST (WHEN *LALR-DEBUG* (PRINC "Shift BCONST to STATE-9 ")) (SHIFT-FROM #'STATE-0) (STATE-9)) (ICONST (WHEN *LALR-DEBUG* (PRINC "Shift ICONST to STATE-8 ")) (SHIFT-FROM #'STATE-0) (STATE-8)) (ID (WHEN *LALR-DEBUG* (PRINC "Shift ID to STATE-7 ")) (SHIFT-FROM #'STATE-0) (STATE-7)) (FACTOR (WHEN *LALR-DEBUG* (PRINC "Shift FACTOR to STATE-4 ")) (SHIFT-FROM #'STATE-0) (STATE-4)) (TERM (WHEN *LALR-DEBUG* (PRINC "Shift TERM to STATE-12 ")) (SHIFT-FROM #'STATE-0) (STATE-12)) (EXP (WHEN *LALR-DEBUG* (PRINC "Shift EXP to STATE-1 ")) (SHIFT-FROM #'STATE-0) (STATE-1)) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-1 NIL (CASE (INPUT-PEEK) (+ (WHEN *LALR-DEBUG* (PRINC "Shift + to STATE-10 ")) (SHIFT-FROM #'STATE-1) (STATE-10)) (- (WHEN *LALR-DEBUG* (PRINC "Shift - to STATE-2 ")) (SHIFT-FROM #'STATE-1) (STATE-2)) ((NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce EXP --> $START ")) (REDUCE-CAT #'STATE-1 '$START 1 NIL)) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-2 NIL (CASE (INPUT-PEEK) (BCONST (WHEN *LALR-DEBUG* (PRINC "Shift BCONST to STATE-9 ")) (SHIFT-FROM #'STATE-2) (STATE-9)) (ICONST (WHEN *LALR-DEBUG* (PRINC "Shift ICONST to STATE-8 ")) (SHIFT-FROM #'STATE-2) (STATE-8)) (ID (WHEN *LALR-DEBUG* (PRINC "Shift ID to STATE-7 ")) (SHIFT-FROM #'STATE-2) (STATE-7)) (FACTOR (WHEN *LALR-DEBUG* (PRINC "Shift FACTOR to STATE-4 ")) (SHIFT-FROM #'STATE-2) (STATE-4)) (TERM (WHEN *LALR-DEBUG* (PRINC "Shift TERM to STATE-3 ")) (SHIFT-FROM #'STATE-2) (STATE-3)) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-3 NIL (CASE (INPUT-PEEK) ((+ - NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce EXP - TERM --> EXP ")) (REDUCE-CAT #'STATE-3 'EXP 3 #'(LAMBDA (EXP N TERM) (LIST '- EXP TERM)))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-4 NIL (CASE (INPUT-PEEK) (* (WHEN *LALR-DEBUG* (PRINC "Shift * to STATE-5 ")) (SHIFT-FROM #'STATE-4) (STATE-5)) ((NIL - +) (WHEN *LALR-DEBUG* (PRINC "Reduce FACTOR --> TERM ")) (REDUCE-CAT #'STATE-4 'TERM 1 #'(LAMBDA (FACTOR) FACTOR))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-5 NIL (CASE (INPUT-PEEK) (BCONST (WHEN *LALR-DEBUG* (PRINC "Shift BCONST to STATE-9 ")) (SHIFT-FROM #'STATE-5) (STATE-9)) (ICONST (WHEN *LALR-DEBUG* (PRINC "Shift ICONST to STATE-8 ")) (SHIFT-FROM #'STATE-5) (STATE-8)) (ID (WHEN *LALR-DEBUG* (PRINC "Shift ID to STATE-7 ")) (SHIFT-FROM #'STATE-5) (STATE-7)) (FACTOR (WHEN *LALR-DEBUG* (PRINC "Shift FACTOR to STATE-4 ")) (SHIFT-FROM #'STATE-5) (STATE-4)) (TERM (WHEN *LALR-DEBUG* (PRINC "Shift TERM to STATE-6 ")) (SHIFT-FROM #'STATE-5) (STATE-6)) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-6 NIL (CASE (INPUT-PEEK) ((NIL - +) (WHEN *LALR-DEBUG* (PRINC "Reduce FACTOR * TERM --> TERM ")) (REDUCE-CAT #'STATE-6 'TERM 3 #'(LAMBDA (FACTOR N TERM) (LIST '* FACTOR TERM)))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-7 NIL (CASE (INPUT-PEEK) ((+ - * NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce ID --> FACTOR ")) (REDUCE-CAT #'STATE-7 'FACTOR 1 #'(LAMBDA (ID) (CONST-VALUE ID)))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-8 NIL (CASE (INPUT-PEEK) ((+ - * NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce ICONST --> FACTOR ")) (REDUCE-CAT #'STATE-8 'FACTOR 1 #'(LAMBDA (ICONST) (CONST-VALUE ICONST)))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-9 NIL (CASE (INPUT-PEEK) ((+ - * NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce BCONST --> FACTOR ")) (REDUCE-CAT #'STATE-9 'FACTOR 1 #'(LAMBDA (BCONST) (CONST-VALUE BCONST)))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-10 NIL (CASE (INPUT-PEEK) (BCONST (WHEN *LALR-DEBUG* (PRINC "Shift BCONST to STATE-9 ")) (SHIFT-FROM #'STATE-10) (STATE-9)) (ICONST (WHEN *LALR-DEBUG* (PRINC "Shift ICONST to STATE-8 ")) (SHIFT-FROM #'STATE-10) (STATE-8)) (ID (WHEN *LALR-DEBUG* (PRINC "Shift ID to STATE-7 ")) (SHIFT-FROM #'STATE-10) (STATE-7)) (FACTOR (WHEN *LALR-DEBUG* (PRINC "Shift FACTOR to STATE-4 ")) (SHIFT-FROM #'STATE-10) (STATE-4)) (TERM (WHEN *LALR-DEBUG* (PRINC "Shift TERM to STATE-11 ")) (SHIFT-FROM #'STATE-10) (STATE-11)) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-11 NIL (CASE (INPUT-PEEK) ((+ - NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce EXP + TERM --> EXP ")) (REDUCE-CAT #'STATE-11 'EXP 3 #'(LAMBDA (EXP N TERM) (LIST '+ EXP TERM)))) (OTHERWISE (FUNCALL PARSE-ERROR)))) (STATE-12 NIL (CASE (INPUT-PEEK) ((+ - NIL) (WHEN *LALR-DEBUG* (PRINC "Reduce TERM --> EXP ")) (REDUCE-CAT #'STATE-12 'EXP 1 #'(LAMBDA (TERM) TERM))) (OTHERWISE (FUNCALL PARSE-ERROR))))) (STATE-0)))) |#