-- Lua defs -- we need some stuff from here local parser = require "parser" local selfify = parser.selfify local EOF = parser.EOF local COLLECT = parser.COLLECT local collect_fallback = parser.collect_fallback -- "dummies" -- see http://www.lua.org/source/5.3/llex.h.html#RESERVED local TK_AND, TK_BREAK, TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION, TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT, TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_SHL, TK_SHR, TK_DBCOLON, TK_EOS, TK_FLT, TK_INT, TK_NAME, TK_STRING = {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} local defs = {} defs.base = { [" "] = "whitespace", ["\n"] = "newline", ["\r"] = "newline", ["\v"] = "whitespace", ["\t"] = "whitespace", ["\f"] = "whitespace", ["0"] = "digit", ["1"] = "digit", ["2"] = "digit", ["3"] = "digit", ["4"] = "digit", ["5"] = "digit", ["6"] = "digit", ["7"] = "digit", ["8"] = "digit", ["9"] = "digit", ["a"] = "hexdigit", ["b"] = "hexdigit", ["c"] = "hexdigit", ["d"] = "hexdigit", ["e"] = "hexdigit", ["f"] = "hexdigit", ["A"] = "hexdigit", ["B"] = "hexdigit", ["C"] = "hexdigit", ["D"] = "hexdigit", ["E"] = "hexdigit", ["F"] = "hexdigit", ["g"] = "alpha", ["h"] = "alpha", ["i"] = "alpha", ["j"] = "alpha", ["k"] = "alpha", ["l"] = "alpha", ["m"] = "alpha", ["n"] = "alpha", ["o"] = "alpha", ["p"] = "alpha", ["q"] = "alpha", ["r"] = "alpha", ["s"] = "alpha", ["t"] = "alpha", ["u"] = "alpha", ["v"] = "alpha", ["w"] = "alpha", ["x"] = "alpha", ["y"] = "alpha", ["z"] = "alpha", ["G"] = "alpha", ["H"] = "alpha", ["I"] = "alpha", ["J"] = "alpha", ["K"] = "alpha", ["L"] = "alpha", ["M"] = "alpha", ["N"] = "alpha", ["O"] = "alpha", ["P"] = "alpha", ["Q"] = "alpha", ["R"] = "alpha", ["S"] = "alpha", ["T"] = "alpha", ["U"] = "alpha", ["V"] = "alpha", ["W"] = "alpha", ["X"] = "alpha", ["Y"] = "alpha", ["Z"] = "alpha", } local function countline(state, token, rule) state.line = (state.line or 1) + 1 end local function mknewline(t, hookn, fallback) fallback = fallback or t t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback}) t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback}) return t end do local tstring = selfify({}) defs.string = tstring tstring.defs = defs do local tsescapes = setmetatable(mknewline({ ["'"] = "insertraw", ['"'] = "insertraw", ['\\'] = "insertraw", ["a"] = "insertmap", ["b"] = "insertmap", ["f"] = "insertmap", ["n"] = "insertmap", ["r"] = "insertmap", ["t"] = "insertmap", ["v"] = "insertmap", ["z"] = "skipwhitespace", ["u"] = "unicode", ["x"] = "hex", --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end, }, 1, tstring), {__index = defs.base}) defs.string.escapes = tsescapes tsescapes.string = defs.string function tsescapes.insertraw(state, token) collect_fallback(state, token) return "string" end do local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" } function tsescapes.insertmap(state, token) collect_fallback(state, map[token]) return "string" end end function tsescapes.digit(state, token) local digit = string.find("1234567890", token, 1, true) local num = state.in_digit if digit then num = (num or 0) * 10 + digit % 10 state.c = (state.c or 0) + 1 if state.c < 3 then state.in_digit = num return "digitc" end end if num > 255 then return nil end collect_fallback(state, string.char(num)) state.in_digit = nil state.c = nil if not digit then collect_fallback(state, token) end return "string" end tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring}) tsescapes.digitc[1]=function(state, token, rule) if rule == nil then collect_fallback(state, string.char(state.in_digit)) state.in_digit = nil state.c = nil end end tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base}) function tsescapes.hex.hexdigit(state, token) local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true) assert(digit, "this should never be called for non-hex-digits") local num = state.in_hex if num then num = num * 16 + digit % 16 collect_fallback(state, string.char(num)) state.in_hex = nil return "string" else state.in_hex = digit % 16 return "self" end end do local tseunicode = {} tseunicode["{"] = "hex" do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base}) tseunicode.hex = tseuhex function tseuhex.hexdigit(state, token) local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true) assert(digit, "this should never be called for non-hex-digits") state.in_hex = (state.in_hex or 0) * 16 + digit % 16 if state.in_hex <= 2147483647 then return "self" end end tseuhex["}"] = function(state, token) local num = state.in_hex state.in_hex = nil if num < 128 then collect_fallback(state, string.char(num)) return "string" end local bytes = "" while num > 63 do local v = num % 64 bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient num = (num - v) / 64 end if num >= 2^6/(2^#bytes) then local v = num % 64 bytes = string.char(128 + v) .. bytes num = (num - v) / 64 end do local v = 0 for i=1,#bytes do v = v + 128 / 2^i end v = v + num assert(v < 126) bytes = string.char(128 + v) .. bytes end collect_fallback(state, bytes) return "string" end end tsescapes.unicode = tseunicode end do local tseskipwhitespace = selfify(mknewline({ string = defs.string, whitespace = "self", [""] = "string", [1] = collect_fallback, }, 2)) --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace}) --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace}) local tbase = defs.base local tbasemap = {whitespace = "whitespace"} setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end}) tsescapes.skipwhitespace = tseskipwhitespace end end tstring['\\'] = "escapes" tstring['"'] = "close" tstring["'"] = "close" tstring['\n'] = false tstring['\r'] = false tstring[""] = "self" tstring[1] = collect_fallback function tstring.close(state, token) if state.in_string == token then state.in_string = nil state[#state+1] = table.concat(state[COLLECT]) state[COLLECT] = nil return "defs" else collect_fallback(state, token) return "self" end end end do local tlongstring = selfify({}) defs.longstring = tlongstring -- TODO end defs["'"] = "string_open" defs['"'] = "string_open" defs["["] = "maybe_longstring" defs.maybe_longstring = setmetatable({ defs = defs, ['['] = "longstring_open", ['='] = "longstring_open", longstring_count = selfify({ ["="] = function(state, token) state.longstring_count = state.longstring_count + 1 return "self" end, longstring = defs.longstring }), longstring_open = function(state, token) if token == "=" then state.longstring_count = state.longstring_count or 0 + 1 return "longstring_count" elseif token == "[" then return "longstring" end end, [-1] = function(state, token, rule) if rule ~= "longstring_open" then state[#state+1] = "[" end end }, {__index=defs}) -- these are needed for proper line counts --defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs}) --defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs}) mknewline(defs, 1) setmetatable(defs, {__index=defs.base}) function defs.string_open(state, token) if not state.in_string then state[#state+1] = TK_STRING state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff? state.in_string = token return "string" end assert("this shouldn't happen") end return { defs = defs, tokens = { TK_AND = TK_AND, TK_BREAK = TK_BREAK, TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION, TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT, TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE, TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE, TK_SHL = TK_SHL, TK_SHR = TK_SHR, TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS, TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING }, }