$tokens = [
# End of source.
"END",
# Operators and punctuators. Some pair-wise order matters, e.g. (+, -)
# and (UNARY_PLUS, UNARY_MINUS).
"\n", ";",
",",
"=",
"?", ":", "CONDITIONAL",
"||",
"&&",
"|",
"^",
"&",
"==", "!=", "===", "!==",
"<", "<=", ">=", ">",
"<<", ">>", ">>>",
"+", "-",
"*", "/", "%",
"!", "~", "UNARY_PLUS", "UNARY_MINUS",
"++", "--",
".",
"[", "]",
"{", "}",
"(", ")",
# Nonterminal tree node type codes.
"SCRIPT", "BLOCK", "LABEL", "FOR_IN", "CALL", "NEW_WITH_ARGS", "INDEX",
"ARRAY_INIT", "OBJECT_INIT", "PROPERTY_INIT", "GETTER", "SETTER",
"GROUP", "LIST",
# Terminals.
"IDENTIFIER", "NUMBER", "STRING", "REGEXP",
# Keywords.
"break",
"case", "catch", "const", "continue",
"debugger", "default", "delete", "do",
"else", "enum",
"false", "finally", "for", "function",
"if", "in", "instanceof",
"new", "null",
"return",
"switch",
"this", "throw", "true", "try", "typeof",
"var", "void",
"while", "with",
]
# Operator and punctuator mapping from token to tree node type name.
$opTypeNames = {
"\n" => "NEWLINE",
';' => "SEMICOLON",
',' => "COMMA",
'?' => "HOOK",
':' => "COLON",
'||' => "OR",
'&&' => "AND",
'|' => "BITWISE_OR",
'^' => "BITWISE_XOR",
'&' => "BITWISE_AND",
'===' => "STRICT_EQ",
'==' => "EQ",
'=' => "ASSIGN",
'!==' => "STRICT_NE",
'!=' => "NE",
'<<' => "LSH",
'<=' => "LE",
'<' => "LT",
'>>>' => "URSH",
'>>' => "RSH",
'>=' => "GE",
'>' => "GT",
'++' => "INCREMENT",
'--' => "DECREMENT",
'+' => "PLUS",
'-' => "MINUS",
'*' => "MUL",
'/' => "DIV",
'%' => "MOD",
'!' => "NOT",
'~' => "BITWISE_NOT",
'.' => "DOT",
'[' => "LEFT_BRACKET",
']' => "RIGHT_BRACKET",
'{' => "LEFT_CURLY",
'}' => "RIGHT_CURLY",
'(' => "LEFT_PAREN",
')' => "RIGHT_PAREN"
}
# Hash of keyword identifier to tokens index.
$keywords = {}
# Define const END, etc., based on the token names. Also map name to index.
$consts = {}
$tokens.length.times do |i|
t = $tokens[i]
if /\A[a-z]/ =~ t
$consts[t.upcase] = i
$keywords[t] = i
elsif /\A\W/ =~ t
$consts[$opTypeNames[t]] = i
else
$consts[t] = i
end
end
# Map assignment operators to their indexes in the tokens array.
$assignOps = ['|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%']
$assignOpsHash = {}
$assignOps.length.times do |i|
t = $assignOps[i]
$assignOpsHash[t] = $consts[$opTypeNames[t]]
end
$opPrecedence = {
"SEMICOLON" => 0,
"COMMA" => 1,
"ASSIGN" => 2,
"HOOK" => 3, "COLON" => 3, "CONDITIONAL" => 3,
"OR" => 4,
"AND" => 5,
"BITWISE_OR" => 6,
"BITWISE_XOR" => 7,
"BITWISE_AND" => 8,
"EQ" => 9, "NE" => 9, "STRICT_EQ" => 9, "STRICT_NE" => 9,
"LT" => 10, "LE" => 10, "GE" => 10, "GT" => 10, "IN" => 10, "INSTANCEOF" => 10,
"LSH" => 11, "RSH" => 11, "URSH" => 11,
"PLUS" => 12, "MINUS" => 12,
"MUL" => 13, "DIV" => 13, "MOD" => 13,
"DELETE" => 14, "VOID" => 14, "TYPEOF" => 14, # PRE_INCREMENT: 14, PRE_DECREMENT: 14,
"NOT" => 14, "BITWISE_NOT" => 14, "UNARY_PLUS" => 14, "UNARY_MINUS" => 14,
"INCREMENT" => 15, "DECREMENT" => 15, # postfix
"NEW" => 16,
"DOT" => 17
}
# Map operator type code to precedence.
$opPrecedence.keys.each do |i|
$opPrecedence[$consts[i]] = $opPrecedence[i]
end
$opArity = {
"COMMA" => -2,
"ASSIGN" => 2,
"CONDITIONAL" => 3,
"OR" => 2,
"AND" => 2,
"BITWISE_OR" => 2,
"BITWISE_XOR" => 2,
"BITWISE_AND" => 2,
"EQ" => 2, "NE" => 2, "STRICT_EQ" => 2, "STRICT_NE" => 2,
"LT" => 2, "LE" => 2, "GE" => 2, "GT" => 2, "IN" => 2, "INSTANCEOF" => 2,
"LSH" => 2, "RSH" => 2, "URSH" => 2,
"PLUS" => 2, "MINUS" => 2,
"MUL" => 2, "DIV" => 2, "MOD" => 2,
"DELETE" => 1, "VOID" => 1, "TYPEOF" => 1, # PRE_INCREMENT: 1, PRE_DECREMENT: 1,
"NOT" => 1, "BITWISE_NOT" => 1, "UNARY_PLUS" => 1, "UNARY_MINUS" => 1,
"INCREMENT" => 1, "DECREMENT" => 1, # postfix
"NEW" => 1, "NEW_WITH_ARGS" => 2, "DOT" => 2, "INDEX" => 2, "CALL" => 2,
"ARRAY_INIT" => 1, "OBJECT_INIT" => 1, "GROUP" => 1
}
# Map operator type code to arity.
$opArity.keys.each do |i|
$opArity[$consts[i]] = $opArity[i]
end
# NB: superstring tokens (e.g., ++) must come before their substring token
# counterparts (+ in the example), so that the $opRegExp regular expression
# synthesized from this list makes the longest possible match.
$ops = [';', ',', '?', ':', '||', '&&', '|', '^', '&', '===', '==',
'=', '!==', '!=', '<<', '<=', '<', '>>>', '>>', '>=', '>', '++', '--',
'+', '-', '*', '/', '%', '!', '~', '.', '[', ']', '{', '}', '(', ')']
# Build a regexp that recognizes operators and punctuators (except newline).
$opRegExpSrc = "\\A"
$ops.length.times do |i|
$opRegExpSrc += "|\\A" if $opRegExpSrc != "\\A"
$opRegExpSrc += $ops[i].gsub(/([?|^&(){}\[\]+\-*\/\.])/) {|s| "\\" + s}
end
$opRegExp = Regexp.new($opRegExpSrc, Regexp::MULTILINE)
# A regexp to match floating point literals (but not integer literals).
$fpRegExp = Regexp.new("\\A\\d+\\.\\d*(?:[eE][-+]?\\d+)?|\\A\\d+(?:\\.\\d*)?[eE][-+]?\\d+|\\A\\.\\d+(?:[eE][-+]?\\d+)?", Regexp::MULTILINE)
class Tokenizer
attr_accessor :cursor, :source, :tokens, :tokenIndex, :lookahead
attr_accessor :scanNewlines, :scanOperand, :filename, :lineno
def initialize (source, filename, line)
@cursor = 0
@source = source.to_s
@tokens = []
@tokenIndex = 0
@lookahead = 0
@scanNewlines = false
@scanOperand = true
@filename = filename or ""
@lineno = line or 1
end
def input
return @source.slice(@cursor, @source.length - @cursor)
end
def done
return self.peek == $consts["END"];
end
def token
return @tokens[@tokenIndex];
end
def match (tt)
got = self.get
#puts got
#puts tt
return got == tt || self.unget
end
def mustMatch (tt)
raise SyntaxError.new("Missing " + $tokens[tt].downcase, self) unless self.match(tt)
return self.token
end
def peek
if @lookahead > 0
#tt = @tokens[(@tokenIndex + @lookahead)].type
tt = @tokens[(@tokenIndex + @lookahead) & 3].type
else
tt = self.get
self.unget
end
return tt
end
def peekOnSameLine
@scanNewlines = true;
tt = self.peek
@scanNewlines = false;
return tt
end
def get
while @lookahead > 0
@lookahead -= 1
@tokenIndex = (@tokenIndex + 1) & 3
token = @tokens[@tokenIndex]
return token.type if token.type != $consts["NEWLINE"] || @scanNewlines
end
while true
input = self.input
if @scanNewlines
match = /\A[ \t]+/.match(input)
else
match = /\A\s+/.match(input)
end
if match
spaces = match[0]
@cursor += spaces.length
@lineno += spaces.count("\n")
input = self.input
end
match = /\A\/(?:\*(?:.)*?\*\/|\/[^\n]*)/m.match(input)
break unless match
comment = match[0]
@cursor += comment.length
@lineno += comment.count("\n")
end
#puts input
@tokenIndex = (@tokenIndex + 1) & 3
token = @tokens[@tokenIndex]
(@tokens[@tokenIndex] = token = Token.new) unless token
if input.length == 0
#puts "end!!!"
return (token.type = $consts["END"])
end
cursor_advance = 0
if (match = $fpRegExp.match(input))
token.type = $consts["NUMBER"]
token.value = match[0].to_f
elsif (match = /\A0[xX][\da-fA-F]+|\A0[0-7]*|\A\d+/.match(input))
token.type = $consts["NUMBER"]
token.value = match[0].to_i
elsif (match = /\A(\w|\$)+/.match(input))
id = match[0]
token.type = $keywords[id] || $consts["IDENTIFIER"]
token.value = id
elsif (match = /\A"(?:\\.|[^"])*"|\A'(?:[^']|\\.)*'/.match(input))
token.type = $consts["STRING"]
token.value = match[0].to_s
elsif @scanOperand and (match = /\A\/((?:\\.|[^\/])+)\/([gi]*)/.match(input))
token.type = $consts["REGEXP"]
token.value = Regexp.new(match[1], match[2])
elsif (match = $opRegExp.match(input))
op = match[0]
if $assignOpsHash[op] && input[op.length, 1] == '='
token.type = $consts["ASSIGN"]
token.assignOp = $consts[$opTypeNames[op]]
cursor_advance = 1 # length of '='
else
#puts $consts[$opTypeNames[op]].to_s + " " + $opTypeNames[op] + " " + op
token.type = $consts[$opTypeNames[op]]
if @scanOperand and (token.type == $consts["PLUS"] || token.type == $consts["MINUS"])
token.type += $consts["UNARY_PLUS"] - $consts["PLUS"]
end
token.assignOp = nil
end
token.value = op
else
raise SyntaxError.new("Illegal token", self)
end
token.start = @cursor
@cursor += match[0].length + cursor_advance
token.end = @cursor
token.lineno = @lineno
return token.type
end
def unget
#puts "start: lookahead: " + @lookahead.to_s + " tokenIndex: " + @tokenIndex.to_s
@lookahead += 1
raise SyntaxError.new("PANIC: too much lookahead!", self) if @lookahead == 4
@tokenIndex = (@tokenIndex - 1) & 3
#puts "end: lookahead: " + @lookahead.to_s + " tokenIndex: " + @tokenIndex.to_s
return nil
end
end
class SyntaxError
def initialize (msg, tokenizer)
puts msg
puts "on line " + tokenizer.lineno.to_s
end
end
class Token
attr_accessor :type, :value, :start, :end, :lineno, :assignOp
end
class CompilerContext
attr_accessor :inFunction, :stmtStack, :funDecls, :varDecls
attr_accessor :bracketLevel, :curlyLevel, :parenLevel, :hookLevel
attr_accessor :ecmaStrictMode, :inForLoopInit
def initialize (inFunction)
@inFunction = inFunction
@stmtStack = []
@funDecls = []
@varDecls = []
@bracketLevel = @curlyLevel = @parenLevel = @hookLevel = 0
@ecmaStrictMode = @inForLoopInit = false
end
end
class Node < Array
attr_accessor :type, :value, :lineno, :start, :end, :tokenizer, :initializer
attr_accessor :name, :params, :funDecls, :varDecls, :body, :functionForm
attr_accessor :assignOp, :expression, :condition, :thenPart, :elsePart
attr_accessor :readOnly, :isLoop, :setup, :postfix, :update, :exception
attr_accessor :object, :iterator, :varDecl, :label, :target, :tryBlock
attr_accessor :catchClauses, :varName, :guard, :block, :discriminant, :cases
attr_accessor :defaultIndex, :caseLabel, :statements, :statement
def initialize (t, type = nil)
token = t.token
if token
if type != nil
@type = type
else
@type = token.type
end
@value = token.value
@lineno = token.lineno
@start = token.start
@end = token.end
else
@type = type
@lineno = t.lineno
end
@tokenizer = t
#for (var i = 2; i < arguments.length; i++)
#this.push(arguments[i]);
end
alias superPush push
# Always use push to add operands to an expression, to update start and end.
def push (kid)
if kid.start and @start
@start = kid.start if kid.start < @start
end
if kid.end and @end
@end = kid.end if @end < kid.end
end
return superPush(kid)
end
# def to_s
# a = []
#
# #for (var i in this) {
# # if (this.hasOwnProperty(i) && i != 'type')
# # a.push({id: i, value: this[i]});
# #}
# #a.sort(function (a,b) { return (a.id < b.id) ? -1 : 1; });
# iNDENTATION = " "
# n = (Node.indentLevel += 1)
# t = $tokens[@type]
# s = "{\n" + iNDENTATION.repeat(n) +
# "type: " + (/^\W/.test(t) and opTypeNames[t] or t.upcase)
# #for (i = 0; i < a.length; i++)
# # s += ",\n" + INDENTATION.repeat(n) + a[i].id + ": " + a[i].value
# s += ",\n" + iNDENTATION.repeat(n) + @value + ": " + a[i].value
# n = (Node.indentLevel -= 1)
# s += "\n" + iNDENTATION.repeat(n) + "}"
# return s
# end
def to_s
attrs = [@value,
@lineno, @start, @end,
@name, @params, @funDecls, @varDecls, @body, @functionForm,
@assignOp, @expression, @condition, @thenPart, @elsePart]
#puts $tokens[@condition.type] if @condition != nil
#if /\A[a-z]/ =~ $tokens[@type] # identifier
# print @tokenizer.source.slice($cursor, @start - $cursor) if $cursor < @start
# print ''
# print @tokenizer.source.slice(@start, $tokens[@type].length)
# print ''
# $cursor = @start + $tokens[@type].length
#end
#puts (" " * $ind) + "{" + $tokens[@type] + "\n" if /\A[a-z]/ =~ $tokens[@type]
#puts (" " * $ind) + " " + @start.to_s + "-" + @end.to_s + "\n"
$ind += 1
#puts @value
self.length.times do |i|
self[i].to_s if self[i] != self and self[i].class == Node
end
attrs.length.times do |attr|
if $tokens[@type] == "if"
# puts $tokens[attrs[attr].type] if attrs[attr].class == Node and attrs[attr] !== self
end
attrs[attr].to_s if attrs[attr].class == Node #and attrs[attr] != self
#puts (" " * $ind).to_s + attrs[attr].to_s if attrs[attr].to_s != nil and attrs[attr] != self
end
$ind -= 1
#puts "\n}\n"
if $ind == 0
print @tokenizer.source.slice($cursor, @tokenizer.source.length - $cursor)
end
return ""
end
def getSource
return @tokenizer.source.slice(@start, @end)
end
def filename
return @tokenizer.filename
end
end
$cursor = 0
$ind = 0
def Script (t, x)
n = Statements(t, x)
n.type = $consts["SCRIPT"]
n.funDecls = x.funDecls
n.varDecls = x.varDecls
return n
end
# Statement stack and nested statement handler.
# nb. Narcissus allowed a function reference, here we use Statement explicitly
def nest (t, x, node, end_ = nil)
x.stmtStack.push(node)
n = Statement(t, x)
x.stmtStack.pop
end_ and t.mustMatch(end_)
return n
end
def Statements (t, x)
n = Node.new(t, $consts["BLOCK"])
x.stmtStack.push(n)
n.push(Statement(t, x)) while !t.done and t.peek != $consts["RIGHT_CURLY"]
x.stmtStack.pop
return n
end
def Block (t, x)
t.mustMatch($consts["LEFT_CURLY"])
n = Statements(t, x)
t.mustMatch($consts["RIGHT_CURLY"])
return n
end
DECLARED_FORM = 0
EXPRESSED_FORM = 1
STATEMENT_FORM = 2
def Statement (t, x)
tt = t.get
# Cases for statements ending in a right curly return early, avoiding the
# common semicolon insertion magic after this switch.
case tt
when $consts["FUNCTION"]
return FunctionDefinition(t, x, true,
(x.stmtStack.length > 1) && STATEMENT_FORM || DECLARED_FORM)
when $consts["LEFT_CURLY"]
n = Statements(t, x)
t.mustMatch($consts["RIGHT_CURLY"])
return n
when $consts["IF"]
n = Node.new(t)
n.condition = ParenExpression(t, x)
x.stmtStack.push(n)
n.thenPart = Statement(t, x)
n.elsePart = t.match($consts["ELSE"]) ? Statement(t, x) : nil
x.stmtStack.pop()
return n
when $consts["SWITCH"]
n = Node.new(t)
t.mustMatch($consts["LEFT_PAREN"])
n.discriminant = Expression(t, x)
t.mustMatch($consts["RIGHT_PAREN"])
n.cases = []
n.defaultIndex = -1
x.stmtStack.push(n)
t.mustMatch($consts["LEFT_CURLY"])
while (tt = t.get) != $consts["RIGHT_CURLY"]
case tt
when $consts["DEFAULT"], $consts["CASE"]
if tt == $consts["DEFAULT"] and n.defaultIndex >= 0
raise SyntaxError.new("More than one switch default", t)
end
n2 = Node.new(t)
if tt == $consts["DEFAULT"]
n.defaultIndex = n.cases.length
else
n2.caseLabel = Expression(t, x, $consts["COLON"])
end
else
raise SyntaxError.new("Invalid switch case", t)
end
t.mustMatch($consts["COLON"])
n2.statements = Node.new(t, $consts["BLOCK"])
while (tt = t.peek) != $consts["CASE"] and tt != $consts["DEFAULT"] and tt != $consts["RIGHT_CURLY"]
n2.statements.push(Statement(t, x))
end
n.cases.push(n2)
end
x.stmtStack.pop
return n
when $consts["FOR"]
n = Node.new(t)
n.isLoop = true
t.mustMatch($consts["LEFT_PAREN"])
if (tt = t.peek) != $consts["SEMICOLON"]
x.inForLoopInit = true
if tt == $consts["VAR"] or tt == $consts["CONST"]
t.get
n2 = Variables(t, x)
else
n2 = Expression(t, x)
end
x.inForLoopInit = false
end
if n2 and t.match($consts["IN"])
n.type = $consts["FOR_IN"]
if n2.type == $consts["VAR"]
if n2.length != 1
raise SyntaxError.new("Invalid for..in left-hand side", t)
end
# NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
n.iterator = n2[0]
n.varDecl = n2
else
n.iterator = n2
n.varDecl = nil
end
n.object = Expression(t, x)
else
n.setup = n2 or nil
t.mustMatch($consts["SEMICOLON"])
n.condition = (t.peek == $consts["SEMICOLON"]) ? nil : Expression(t, x)
t.mustMatch($consts["SEMICOLON"])
n.update = (t.peek == $consts["RIGHT_PAREN"]) ? nil : Expression(t, x)
end
t.mustMatch($consts["RIGHT_PAREN"])
n.body = nest(t, x, n)
return n
when $consts["WHILE"]
n = Node.new(t)
n.isLoop = true
n.condition = ParenExpression(t, x)
n.body = nest(t, x, n)
return n
when $consts["DO"]
n = Node.new(t)
n.isLoop = true
n.body = nest(t, x, n, $consts["WHILE"])
n.condition = ParenExpression(t, x)
if !x.ecmaStrictMode
#