Grammar for the Pyth Dialect of Python Version 1.1, 4 February 2005 This grammar is taken from an on-line version of the full Python grammar, and modified for Pyth. I have retained the original notation, which is sort of a mix of BNF and regular-expression notation: ::= means "produces" or "can be formed from" | means "or" [ x ] means "an optional x" (that is, "x or the empty string") ( x ) grouping ( x )* means "zero or more x's" (as for regular expressions) ( x )+ means "one of more x's" "t" the terminal symbol "t" The definitions below use the following lexemes: identifier stringliteral Simple string literals (in one or three quotes) integer integer literals between 0 and 2**32-1. Decimal literals must be < 2**31. Octal and hexadecimal integers >= 2**31 are interpreted as negative numbers as in Java. NEWLINE A newline character that is not within braces ( (), [], or {}), not at the end of a blank line, and not immediately after a backslash (\). INDENT A new, deeper level of indentation. See section 2.1.7 of the on-line Python documentation (2.1) for a description of the rules for generating INDENT and DEDENT DEDENT The end of a level of indentation. # PART I: Lexical Definitions # These lexical definitions are in BNF form, but you will generally want # to translate them to regular expressions (to use flex or jflex). # They differ from the rest of the BNF in that they refer to # characters, not tokens. In particular, white space may not separate # the strings matched by symbols on the right-hand sides of lexical # productions. identifier ::= (letter|"_") (letter | digit | "_")* letter ::= lowercase | uppercase lowercase ::= "a"..."z" uppercase ::= "A"..."Z" digit ::= "0"..."9" stringliteral ::= [stringprefix](shortstring | longstring) # Raw ("r" or "R") means that escape sequences (escapeseq) are NOT # translated, but are just left in as is (the syntax of raw # string literals is the same as "cooked" string literals; just the # meaning is changed). stringprefix ::= "r" | "R" shortstring ::= "'" shortstringitem* "'" | '"' shortstringitem* '"' longstring ::= "'''" longstringitem* "'''" | '"""' longstringitem* '"""' # However, a long string ends at the first occurrence of ''' or """, so # while you can have one or two unescaped " characters in the middle of # a """ """ string (or one or two unescaped ' characters in the middle # of a ''' ''' string), THREE quotes end the string. shortstringitem ::= shortstringchar | escapeseq longstringitem ::= longstringchar | escapeseq shortstringchar ::= longstringchar ::= # NOTE: No octal, hexadecimal, or Unicode escapes. escapeseq ::= "\" integer ::= decimalinteger | octinteger | hexinteger decimalinteger ::= nonzerodigit digit* | "0" octinteger ::= "0" octdigit+ hexinteger ::= "0" ("x" | "X") hexdigit+ nonzerodigit ::= "1"..."9" octdigit ::= "0"..."7" hexdigit ::= digit | "a"..."f" | "A"..."F" # Other lexemes # All quoted items in Part II, below, are terminal symbols. They fall into # three categories: keywords, operators, and delimiters. # Keywords have the syntax of identifiers, but are distinct from them # (e.g., you cannot use "if" as an identifier): # and elif in pass # break else is print # class for lambda return # continue global not while # def if or # in addition, the Pyth dialect reserves one other keyword: # native # Operators appear in expressions: # + - * ** / % # << >> & | ^ ~ # < > <= >= == != <> # Delimiters appear in statements: # ( ) [ ] { } # , : . ` = ; # += -= *= /= %= **= # &= |= ^= >>= <<= ############################################################ # PART II: Context-free Syntax # Top level program ::= (NEWLINE | statement)* funcdef ::= "def" identifier "(" [parameter_list] ")" ":" suite parameter_list ::= identifier ("," identifier)* [","] classdef ::= "class" identifier [inheritance] ":" suite inheritance ::= "(" identifier ")" # Expressions atom ::= identifier | literal | enclosure enclosure ::= parenth_form | list_display | dict_display literal ::= stringliteral | integer parenth_form ::= "(" [expression_list] ")" list_display ::= "[" [expression_list] "]" dict_display ::= "{" [key_datum_list] "}" key_datum_list ::= key_datum ("," key_datum)* [","] key_datum ::= expression ":" expression primary ::= atom | attributeref | subscription | slicing | call attributeref ::= primary "." identifier subscription ::= primary "[" expression_list "]" slicing ::= primary "[" [expression] ":" [expression] "]" call ::= primary "(" [argument_list] ")" argument_list ::= expression_list power ::= primary ["**" u_expr] u_expr ::= power | "-" u_expr | "+" u_expr | "~" u_expr m_expr ::= u_expr | m_expr "*" u_expr | m_expr "/" u_expr | m_expr "%" u_expr a_expr ::= m_expr | a_expr "+" m_expr | a_expr "-" m_expr shift_expr ::= a_expr | shift_expr ( "<<" | ">>" ) a_expr and_expr ::= shift_expr | and_expr "&" shift_expr xor_expr ::= and_expr | xor_expr "^" and_expr or_expr ::= xor_expr | or_expr "|" xor_expr comparison ::= or_expr ( comp_operator or_expr )* comp_operator ::= "<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" | "is" ["not"] | ["not"] "in" not_test ::= comparison | "not" not_test and_test ::= not_test | and_test "and" not_test or_test ::= and_test | or_test "or" and_test lambda_form ::= "lambda" [parameter_list]: expression expression ::= or_test | lambda_form expression_list ::= proper_expression_list [ "," ] proper_expression_list ::= expression ( "," expression )* # Statements simple_stmt ::= expression_stmt | assignment_stmt | augmented_assignment_stmt | pass_stmt | print_stmt | return_stmt | break_stmt | continue_stmt | global_stmt expression_stmt ::= expression_list assignment_stmt ::= (target_list "=")+ expression_list target_list ::= target ("," target)* [","] target ::= identifier | attributeref | subscription | slicing augmented_assignment_stmt ::= target augop expression_list augop ::= "+=" | "-=" | "*=" | "/=" | "%=" | "**=" | ">>=" | "<<=" | "&=" | "^=" | "|=" pass_stmt ::= "pass" print_stmt ::= "print" [ proper_expression_list [ "," ] ] return_stmt ::= "return" [expression_list] break_stmt ::= "break" continue_stmt ::= "continue" global_stmt ::= "global" identifier ("," identifier)* compound_stmt ::= if_stmt | while_stmt | for_stmt | funcdef | classdef suite ::= stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT statement ::= stmt_list NEWLINE | compound_stmt stmt_list ::= simple_stmt (";" simple_stmt)* [";"] if_stmt ::= "if" expression ":" suite ( "elif" expression ":" suite )* ["else" ":" suite] while_stmt ::= "while" expression ":" suite ["else" ":" suite] for_stmt ::= "for" identifier_list "in" expression_list ":" suite [ "else" ":" suite ] identifier_list ::= identifier ("," identifier)* # Pyth Extensions to Standard Python simple_stmt ::= type_assertion type_assertion ::= ID ":" type type ::= simple_type | function_type simple_type ::= ID function_type ::= simple_type "->" type | "(" [ type_list ] ")" "->" type type_list ::= type ( "," type )* call ::= "native" string_literal "(" [argument_list] ")"