CS164 Section 4 - February 17/18, 2004 Announcements: WA3 due Thursday at 12:30pm - Please follow the instructions on PA WA4 out Thursday carefully to avoid any penalities. PA3 due February 20 Many students did not submit test.cl for PA2 Outline: - Finish discussion on LL Parsing - LR Parsing - Bison/Cup ============================================================== 1. Finish Discussion on LL Parsing from last time ============================================================= 2. LR Parsing ------------------------------------------------------------------ Difference between LL(k) and LR(k) "For a grammar to be LR(k), we must be able to recognize the occurrence of the right side of a production, having seen all of what is derived from that right side with k input symbols of lookahead. This requirement is far less stringent than that for LL(k) grammars where we must be able to recognize the use of a production seeing only the first k symbols of what its right side derives. Thus, LR grammars can describe more languages than LL grammars" - Dragon Book Idea: "reverse" the process "reducing" a string to the start symbol by inverting the productions - Do this by dividing the string into two parts. The left-side is the "processing" side and the right-side is the "to be processed" side. - What two actions do we have to "process"? - Shift - Reduce - How do we decide which action to use when? ------------------------------------------------------------------------ Consider the following grammar: G -> B B -> B E+ | -E E -> epsilon | E c Rules G -> B (Rule R1) B -> B E+ (Rule R2) B -> -E (Rule R3) E -> epsilon (Rule R4) E -> E c (Rule R5) The set of LR(1) items: I0: G->.B, $; B->.BE+, $/c/+; B->.-E, $/c/+ I1: B->-.E, $/c/+; E->.Ec, $/c/+, E->., $/c/+ I2: B->-E., $/c/+; E->E.c, $/c/+ I3: E->Ec., $/c/+ I4: B->BE+., $/c/+ I5: G->B., $; B->B.E+, $/c/+; E->.Ec, +/c; E->. +/c I6: B->BE.+, $/c/+; E->E.c, +/c I7: E->Ec., +/c Goto(I0,B)=I5; Goto(I0,-)=I1 Goto(I1,E)=I2 Goto(I2,c)=I3 Goto(I5,E)=I6 Goto(I6,+)=I4 Goto(I6,c)=I7 Action Table: B E + - c $ I0 S5 S1 I1 S2 R4 R4 R4 I2 R3 S3/R3 R3 I3 R5 R5 R5 I4 R2 R2 R2 I5 S6 R4 R4 R1/accept I6 S4 S7 I7 R5 R5 --------------------------------------------------------------------- Parsing the input: -c+$: ^-c+$ shift: (-,1)^c+$ reduce R4: (-,1)(E,2)^c+$ Alternative 1: shift: (-,1)(E,2)(c,3)^+$ reduce R5: (-,1)(E,2)^+$ reduce R3: (B,5)^+$ reduce R4: (B,5)(E,6)^+$ shift: (B,5)(E,6)(+,4)^$ reduce R2: (B,5)^$ reduce R1: accept Alternative 2: reduce R3: (B,5)^c+$ reduce R4: (B,5)(E,6)^c+$ shift: (B,5)(E,6)(c,7)^+$ reduce R5: (B,5)(E,6)^+$ shift: (B,5)(E,6)(+,4)^$ reduce R2: (B,5)^$ reduce R1: accept ========================================================================== 3. Identify errors in the Bison specification given below for the grammar: exp = integer | variable | exp + exp | exp - exp | exp * exp | if bexp then exp else exp fi | exp ; exp bexp = variable | bexp1 & bexp2 | bexp1 '|' bexp2 | ~ bexp -------------------------------------------------------------------------- Here's the Flex Specification exp.l %{ #include #include "exp.tab.h" %} %% if { return IF; } then { return THEN; } else { return ELSE; } [0-9]+ { yylval.integer=atoi(yytext); return NUM; } [a-zA-Z][0-9a-zA-Z] { yylval.string = yytext; return IDENTIFIER; } [ \t]+ /* eat up whitespace */ . { return yytext[0]; } %% --------------------------------------------------------------------------- Here's the Bison Specification exp.y %{ #include struct exp* new_expr(); struct bexp* new_bexpr(); %} %union { struct bexp { enum { Variable, Not, And, Or } code; struct bexp* b1; struct bexp* b2; char* s; }* boolean_expression; struct exp { enum { Integer, Variable, Plus, Minus, Multiply, Conditional } code; struct bexp* b; struct exp* e1; struct exp* e2; int i; char* s; }* expression; int integer; char* string; } %token NUM %token IDENTIFIER @@@ ERROR 1: Define the type of Identifier since its value is used as a string. @@@ %token IDENTIFIER %token IF THEN ELSE FI %left ';' @@@ ERROR 6: '-' should be same precedence as '+' %left '+' '-' %left '*' @@@ ERROR 2: Define the precedence of Boolean operators to avoid shift-reduce conflicts. @@@ %left '&', '|' @@@ %left '~' @@@ ERROR 5: Need to declare types for non-terminals @@@ %type exp @@@ %type bexp %% exp: NUM { $$ = $1; } @@@ ERROR 3: NUM is of type integer while exp is of type struct exp* @@@ { $$ = new_expr(); $$->code = Integer; $$->i=$1; } | IDENTIFIER { $$ = new_expr(); $$->code = Variable; strdup($$->s,$1); } | exp '+' exp { $$ = new_expr(); $$->code = Plus; $$->e1=$1; $$->e2=$2; } @@@ ERROR 4: The second exp is referred by $3 not by $2 @@@ { ....; $$->e2=$3; } | exp '-' exp { $$ = new_expr(); $$->code = Minus; $$->e1 = $1; $$->e2 = $3; } | exp '*' exp { $$ = new_expr(); $$->code = Multiply; $$->e1 = $1; $$->e2 = $3; } | IF bexp THEN exp ELSE exp FI { $$ = new_expr(); $$->code = Conditional; $$->e1 = $4; $$->e2 = $6; $$-> b = $2; } | exp ';' exp { $$ = $3; } @@@ Add error handling to allow the parser to continue if there is an @@@ error in the first expression. @@@ | error ';' exp { $$ = $3; } ; bexp: IDENTIFIER @@@ ERROR 7: IDENTIFIER is of type string while bexp is of type struct bexp* @@@ (same as ERROR 3) @@@ { $$ = new_bexpr(); $$->code = Variable; strdup($$->s,$1); } | '~' bexp { $$ = new_bexpr(); $$->code = Not; $$->b1 = $2; } | bexp '&' bexp { $$ = new_bexpr(); $$->code = And; $$->b1 = $1; $$->b2 = $3; } | bexp '|' bexp { $$ = new_bexpr(); $$->code = Or; $$->b1 = $1; $$->b2 = $3; } ; %% struct exp* new_expr() { return (struct exp*)malloc(sizeof(struct exp)); } struct bexp* new_bexpr() { return (struct bexp*)malloc(sizeof(struct bexp)); } int main () { return yyparse(); } int yyerror(const char*s) { printf("%s\n",s); } -------------------------------------------------------------- Compile it using bison -d exp.y flex exp.l g++ lex.yy.c exp.tab.c -lfl