# # This grammar was closely derived from # ECMA-262, ECMAScript Language Specification, 3rd Edition # Appendix A. # # Changes to specification grammar. # * RegularExpressionLiteral has been added to Literal. # Since we dont have a separate tokenizer. # * removals. # Several "SourceCharacter but not ..." have been expressed as # complemented character sets. <-[...]> They are all tagged with # #SourceCharacter but not ... # But isnt called for all characters anyway, # so no great loss. # # Other notes: # * is redefined to be used, via :words, by A.3, A.4, and A.5. # Note it is always an optional match. # * Parts of the grammar, intended for a tokenizer, are not actually used. # * Some rules, eg StringNumericLiteral, are not used in parsing JavaScript, # but rather in implementing it (eg, by ToNumber). # * Character sets are sometimes collapsed. # Eg, <'0'> | <'1'> ... <'9'> changed to <[0..9]> # * "Foo but not x" is expressed as # # * "lookahead is not an element of { x }" is expressed as # # Such usage is tagged with #lookahead # If rules taking arguments had any hope of working soon, # I would have said . # * "[no LineTerminator here]" is expressed as # . # # Resources: # http://www.ecma-international.org/publications/standards/Ecma-262.htm # http://bclary.com/2004/11/07/ecma-262 # # The SPEC grammar SHOULD NOT BE CHANGED to accommodate the characteristics # of particular regex engines. Its structure follows the spec. If for # instance, your engine cannot handle left recursive rules, you should # create a new grammar, inherit from this one, and redefine just those # rules. See JavaScript::ECMAScript3::Grammar. # # Bugs: # * This grammar has never yet been run, so there no doubt are some. # # Possible Bugs: # * We are not using some of the tokenizer rules. # So it isnt entirely clear we are doing an fully correct parse. # # Todo: # * Create a second grammar which actually runs, is efficient, # and has captures for easy use of the resulting matches. # * Perhaps define a macro gram ($name) { "rule $name :words" }, # rather than having the clutter of lots of explicit :words modifiers. # # Thanks to Bob Clary, who created the HTML-ified ecma-262, without which # this grammar would likely not have been written. - Mitchell N Charity # grammar JavaScript::ECMAScript3::Grammar::Spec; rule ws {*} rule ws_required {+} rule ws_input { | | } rule no_LineTerminator_here { [ & <->*? ] } # 7.2 White Space rule TAB { \x0009 } rule VT { \x000B } rule FF { \x000C } rule SP { \x0020 } rule NBSP { \x00A0 } rule USP { <-----> } # 7.3 Line Terminators rule LF { \x000A } rule CR { \x000D } rule LS { \x2028 } rule PS { \x2029 } # A.1 Lexical Grammar rule SourceCharacter { # see clause 6 . #dot } rule InputElementDiv { # see clause 7 | | | } rule InputElementRegExp { # see clause 7 | | | | } rule WhiteSpace { # see 7.2 | | | | | } rule LineTerminator { # see 7.3 | | | } rule Comment { # see 7.4 | } rule MultiLineComment { # see 7.4 <'/*'> ? <'*/'> } rule MultiLineCommentChars { # see 7.4 ? | <'*'> ? } rule PostAsteriskCommentChars { # see 7.4 ? | <'*'> ? } rule MultiLineNotAsteriskChar { # see 7.4 <-[*]> #SourceCharacter but not ... } rule MultiLineNotForwardSlashOrAsteriskChar { # see 7.4 <-[/*]> #SourceCharacter but not ... } rule SingleLineComment { # see 7.4 <'//'> ? } rule SingleLineCommentChars { # see 7.4 ? } rule SingleLineCommentChar { # see 7.4 > } rule Token { # see 7.5 | | | | } rule ReservedWord { # see 7.5.1 | | | } rule Keyword { # see 7.5.2 <'break'> | <'else'> | <'new'> | <'var'> | <'case'> | <'finally'> | <'return'> | <'void'> | <'catch'> | <'for'> | <'switch'> | <'while'> | <'continue'> | <'function'> | <'this'> | <'with'> | <'default'> | <'if'> | <'throw'> | <'delete'> | <'in'> | <'try'> | <'do'> | <'instanceof'> | <'typeof'> } rule FutureReservedWord { # see 7.5.3 <'abstract'> | <'enum'> | <'int'> | <'short'> | <'boolean'> | <'export'> | <'interface'> | <'static'> | <'byte'> | <'extends'> | <'long'> | <'super'> | <'char'> | <'final'> | <'native'> | <'synchronized'> | <'class'> | <'float'> | <'package'> | <'throws'> | <'const'> | <'goto'> | <'private'> | <'transient'> | <'debugger'> | <'implements'> | <'protected'> | <'volatile'> | <'double'> | <'import'> | <'public'> } rule Identifier { # see 7.6 > } rule IdentifierName { # see 7.6 | } rule IdentifierStart { # see 7.6 | <'$'> | <'_'> | <'\\'> } rule IdentifierPart { # see 7.6 | | | | <'\\'> } rule UnicodeLetter { # see 7.6 # any character in the Unicode categories "Uppercase letter (Lu)", # "Lowercase letter (Ll)", "Titlecase letter (Lt)", # "Modifier letter (Lm)", "Other letter (Lo)", or "Letter number (Nl)". | | | | | } rule UnicodeCombiningMark { # see 7.6 # any character in the Unicode categories "Non-spacing mark (Mn)" # or "Combining spacing mark (Mc)" | } rule UnicodeDigit { # see 7.6 # any character in the Unicode category "Decimal number (Nd)" } rule UnicodeConnectorPunctuation { # see 7.6 # any character in the Unicode category "Connector punctuation (Pc)" } rule HexDigit { # see 7.6 <[0..9a..fA..F]> } rule Punctuator { # see 7.7 <'{'> | <'}'> | <'('> | <')'> | <'['> | <']'> | <'.'> | <';'> | <','> | <'<'> | <'>'> | <'<='> | <'>='> | <'=='> | <'!='> | <'==='> | <'!=='> | <'+'> | <'-'> | <'*'> | <'%'> | <'++'> | <'--'> | <'<<'> | <'>>'> | <'>>>'> | <'&'> | <'|'> | <'^'> | <'!'> | <'~'> | <'&&'> | <'||'> | <'?'> | <':'> | <'='> | <'+='> | <'-='> | <'*='> | <'%='> | <'<<='> | <'>>='> | <'>>>='> | <'&='> | <'|='> | <'^='> } rule DivPunctuator { # see 7.7 <'/'> | <'/='> } rule Literal { # see 7.8 | | | | # ADDED - this is not in the spec. } rule NullLiteral { # see 7.8.1 <'null'> } rule BooleanLiteral { # see 7.8.2 <'true'> | <'false'> } rule NumericLiteral { # see 7.8.3 | } rule DecimalLiteral { # see 7.8.3 <'.'> ? ? | <'.'> ? | ? } rule DecimalIntegerLiteral { # see 7.8.3 <'0'> | ? } rule DecimalDigits { # see 7.8.3 | } rule DecimalDigit { # see 7.8.3 <[0..9]> } rule ExponentIndicator { # see 7.8.3 <'e'> | <'E'> } rule SignedInteger { # see 7.8.3 | <'+'> | <'-'> } rule HexIntegerLiteral { # see 7.8.3 <'0x'> | <'0X'> | } rule StringLiteral { # see 7.8.4 <'"'> ? <'"'> | <'\''> ? <'\''> } rule DoubleStringCharacters { # see 7.8.4 ? } rule SingleStringCharacters { # see 7.8.4 ? } rule DoubleStringCharacter { # see 7.8.4 > <-[\"\\]> #SourceCharacter but not ... | <'\\'> } rule SingleStringCharacter { # see 7.8.4 > <-[\'\\]> #SourceCharacter but not ... | <'\\'> } rule EscapeSequence { # see 7.8.4 | <'0'> > #lookahead | | } rule CharacterEscapeSequence { # see 7.8.4 | } rule SingleEscapeCharacter { # see 7.8.4 <'\''> | <'"'> | <'\\'> | <'b'> | <'f'> | <'n'> | <'r'> | <'t'> | <'v'> } rule EscapeCharacter { # see 7.8.4 | | <'x'> | <'u'> } rule HexEscapeSequence { # see 7.8.4 <'x'> } rule UnicodeEscapeSequence { # see 7.8.4 <'u'> } rule RegularExpressionLiteral { # see 7.8.5 <'/'> <'/'> } rule RegularExpressionBody { # see 7.8.5 } rule RegularExpressionChars { # see 7.8.5 | } rule RegularExpressionFirstChar { # see 7.8.5 > | } rule RegularExpressionChar { # see 7.8.5 > | } rule BackslashSequence { # see 7.8.5 <'\\'> } rule NonTerminator { # see 7.8.5 > } rule RegularExpressionFlags { # see 7.8.5 | } # A.2 Number Conversions rule StringNumericLiteral { # see 9.3.1 ? | ? ? } rule StrWhiteSpace { # see 9.3.1 ? } rule StrWhiteSpaceChar { # see 9.3.1 | | | | | | | | | } rule StrNumericLiteral { # see 9.3.1 | } rule StrDecimalLiteral { # see 9.3.1 | <'+'> | <'-'> } rule StrUnsignedDecimalLiteral { # see 9.3.1 <'Infinity'> | <'.'> ? ? | <'.'> ? | ? } rule DecimalDigits { # see 9.3.1 | } rule DecimalDigit { # see 9.3.1 <[0..9]> } rule ExponentPart { # see 9.3.1 } rule ExponentIndicator { # see 9.3.1 <'e'> | <'E'> } rule SignedInteger { # see 9.3.1 | <'+'> | <'-'> } rule HexIntegerLiteral { # see 9.3.1 <'0x'> | <'0X'> | } rule HexDigit { # see 9.3.1 <[0..9a..fA..F]> } # A.3 Expressions rule PrimaryExpression :words { # see 11.1 <'this'> | | | | | <'('> <')'> } rule ArrayLiteral :words { # see 11.1.4 <'['> ? <']'> | <'['> <']'> | <'['> <','> ? <']'> } rule ElementList :words { # see 11.1.4 ? | <','> ? } rule Elision :words { # see 11.1.4 <','> | <','> } rule ObjectLiteral :words { # see 11.1.5 <'{'> <'}'> | <'{'> <'}'> } rule PropertyNameAndValueList :words { # see 11.1.5 <':'> | <','> <':'> } rule PropertyName :words { # see 11.1.5 | | } rule MemberExpression :words { # see 11.2 | | <'['> <']'> | <'.'> | <'new'> } rule NewExpression :words { # see 11.2 | <'new'> } rule CallExpression :words { # see 11.2 | | <'['> <']'> | <'.'> } rule Arguments :words { # see 11.2 <'('> <')'> | <'('> <')'> } rule ArgumentList :words { # see 11.2 | <','> } rule LeftHandSideExpression :words { # see 11.2 | } rule PostfixExpression :words { # see 11.3 | <'++'> | <'--'> } rule UnaryExpression :words { # see 11.4 | <'delete'> | <'void'> | <'typeof'> | <'++'> | <'--'> | <'+'> | <'-'> | <'~'> | <'!'> } rule MultiplicativeExpression :words { # see 11.5 | <'*'> | <'/'> | <'%'> } rule AdditiveExpression :words { # see 11.6 | <'+'> | <'-'> } rule ShiftExpression :words { # see 11.7 | <'<<'> | <'>>'> | <'>>>'> } rule RelationalExpression :words { # see 11.8 | <'<'> | <'>'> | <'<='> | <'>='> | <'instanceof'> | <'in'> } rule RelationalExpressionNoIn :words { # see 11.8 | <'<'> | <'>'> | <'<='> | <'>='> | <'instanceof'> } rule EqualityExpression :words { # see 11.9 | <'=='> | <'!='> | <'==='> | <'!=='> } rule EqualityExpressionNoIn :words { # see 11.9 | <'=='> | <'!='> | <'==='> | <'!=='> } rule BitwiseANDExpression :words { # see 11.10 | <'&'> } rule BitwiseANDExpressionNoIn :words { # see 11.10 | <'&'> } rule BitwiseXORExpression :words { # see 11.10 | <'^'> } rule BitwiseXORExpressionNoIn :words { # see 11.10 | <'^'> } rule BitwiseORExpression :words { # see 11.10 | <'|'> } rule BitwiseORExpressionNoIn :words { # see 11.10 | <'|'> } rule LogicalANDExpression :words { # see 11.11 | <'&&'> } rule LogicalANDExpressionNoIn :words { # see 11.11 | <'&&'> } rule LogicalORExpression :words { # see 11.11 | <'||'> } rule LogicalORExpressionNoIn :words { # see 11.11 | <'||'> } rule ConditionalExpression :words { # see 11.12 | <'?'> <':'> } rule ConditionalExpressionNoIn :words { # see 11.12 | <'?'> <':'> } rule AssignmentExpression :words { # see 11.13 | } rule AssignmentExpressionNoIn :words { # see 11.13 | } rule AssignmentOperator :words { # see 11.13 <'='> | <'*='> | <'/='> | <'%='> | <'+='> | <'-='> | <'<<='> | <'>>='> | <'>>>='> | <'&='> | <'^='> | <'|='> } rule Expression :words { # see 11.14 | <','> } rule ExpressionNoIn :words { # see 11.14 | <','> } # A.4 Statements rule Statement :words { # see clause 12 | | | | | | | | | | | | | } rule Block :words { # see 12.1 <'{'> ? <'}'> } rule StatementList :words { # see 12.1 | } rule VariableStatement :words { # see 12.2 <'var'> <';'> } rule VariableDeclarationList :words { # see 12.2 | <','> } rule VariableDeclarationListNoIn :words { # see 12.2 | <','> } rule VariableDeclaration :words { # see 12.2 ? } rule VariableDeclarationNoIn :words { # see 12.2 ? } rule Initialiser :words { # see 12.2 <'='> } rule InitialiserNoIn :words { # see 12.2 <'='> } rule EmptyStatement :words { # see 12.3 <';'> } rule ExpressionStatement :words { # see 12.4 > <';'> #lookahead } rule IfStatement :words { # see 12.5 <'if'> <'('> <')'> <'else'> | <'if'> <'('> <')'> } rule IterationStatement :words { # see 12.6 <'do'> <'while'> <'('> <');'> | <'while'> <'('> <')'> | <'for'> <'('> ? <';'> ? <';'> ? <')'> | <'for'> <'('> <'var'> <';'> ? <';'> ? <')'> | <'for'> <'('> <'in'> <')'> | <'for'> <'('> <'var'> <'in'> <')'> } rule ContinueStatement :words { # see 12.7 <'continue'>? <';'> } rule BreakStatement :words { # see 12.8 <'break'>? <';'> } rule ReturnStatement :words { # see 12.9 <'return'>? <';'> } rule WithStatement :words { # see 12.10 <'with'> <'('> <')'> } rule SwitchStatement :words { # see 12.11 <'switch'> <'('> <')'> } rule CaseBlock :words { # see 12.11 <'{'> ? <'}'> | <'{'> ? ? <'}'> } rule CaseClauses :words { # see 12.11 | } rule CaseClause :words { # see 12.11 <'case'> <':'> ? } rule DefaultClause :words { # see 12.11 <'default'> <':'> ? } rule LabelledStatement :words { # see 12.12 <':'> } rule ThrowStatement :words { # see 12.13 <'throw'> <';'> } rule TryStatement :words { # see 12.14 <'try'> | <'try'> | <'try'> } rule Catch :words { # see 12.14 <'catch'> <'('> <')'> } rule Finally :words { # see 12.14 <'finally'> } # A.5 Functions and Programs rule FunctionDeclaration :words { # see clause 13 <'function'> <'('> ? <')'. <'{'> <'}'> } rule FunctionExpression :words { # see clause 13 <'function'> ? <'('> ? <')'> <'{'> <'}'> } rule FormalParameterList :words { # see clause 13 | <','> } rule FunctionBody :words { # see clause 13 } rule Program :words { # see clause 14 } rule SourceElements :words { # see clause 14 | } rule SourceElement :words { # see clause 14 } # A.6 Universal Resource Identifier Character Classes rule uri { # see 15.1.3 ? } rule uriCharacters { # see 15.1.3 ? } rule uriCharacter { # see 15.1.3 | | } rule uriReserved { # see 15.1.3 <';'> | <'/'> | <'?'> | <':'> | <'@'> | <'&'> | <'='> | <'+'> | <'$'> | <','> } rule uriUnescaped { # see 15.1.3 | | } rule uriEscaped { # see 15.1.3 <'%'> } rule uriAlpha { # see 15.1.3 <[a..zA..Z]> } rule uriMark { # see 15.1.3 <'-'> | <'_'> | <'.'> | <'!'> | <'~'> | <'*'> | <'\''> | <'('> | <')'> } # A.7 Regular Expressions rule Pattern { # see 15.10.1 } rule Disjunction { # see 15.10.1 | <'|'> } rule Alternative { # see 15.10.1 | } rule Term { # see 15.10.1 | | } rule Assertion { # see 15.10.1 <'^'> | <'$'> | <'\\'> <'b'> | <'\\'> <'B'> } rule Quantifier { # see 15.10.1 | <'?'> } rule QuantifierPrefix { # see 15.10.1 <'*'> | <'+'> | <'?'> | <'{'> <'}'> | <'{'> <',}'> | <'{'> <','> <'}'> } rule Atom { # see 15.10.1 | <'.'> | <'\\'> | | <'('> <')'> | <'(?:'> <')'> | <'(?='> <')'> | <'(?!'> <')'> } rule PatternCharacter { # see 15.10.1 <-[^$\\.*+?()[\]{}|]> #SourceCharacter but not ... } rule AtomEscape { # see 15.10.1 | | } rule CharacterEscape { # see 15.10.1 | <'c'> | | | } rule ControlEscape { # see 15.10.1 <'f'> | <'n'> | <'r'> | <'t'> | <'v'> } rule ControlLetter { # see 15.10.1 <[a..zA..Z]> } rule IdentityEscape { # see 15.10.1 > } rule DecimalEscape { # see 15.10.1 > #lookahead } rule CharacterClass { # see 15.10.1 <'['> > <']'> #lookahead | <'[^'> <']'> } rule ClassRanges { # see 15.10.1 | } rule NonemptyClassRanges { # see 15.10.1 | | <'-'> } rule NonemptyClassRangesNoDash { # see 15.10.1 | | <'-'> } rule ClassAtom { # see 15.10.1 <'-'> | } rule ClassAtomNoDash { # see 15.10.1 <-[\\\]\-]> #SourceCharacter but not ... | <'\\'> | } rule ClassEscape { # see 15.10.1 | <'b'> | | } grammar JavaScript::ECMAScript3::Grammar is JavaScript::ECMAScript3::Grammar::Spec; # Intended to be a working, usable grammar. # Unimplemented.