759 lines
26 KiB
Lua
759 lines
26 KiB
Lua
--[[
|
|
luatokens.lua - pure-Lua Lua tokenizer
|
|
Copyright (C) 2019 Soni L.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
--]]
|
|
|
|
--[[
|
|
This software is based on Lua 5.1 and Lua 5.3
|
|
|
|
Lua 5.1 license:
|
|
|
|
/******************************************************************************
|
|
* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
******************************************************************************/
|
|
|
|
Lua 5.3 license:
|
|
|
|
/******************************************************************************
|
|
* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
******************************************************************************/
|
|
--]]
|
|
|
|
-- we need some stuff from here
|
|
local parser = require "parser"
|
|
local selfify = parser.selfify
|
|
local EOF = parser.EOF
|
|
local COLLECT = parser.COLLECT
|
|
local collect_fallback = parser.collect_fallback
|
|
|
|
-- "dummies"
|
|
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
|
|
-- keywords
|
|
local TK_AND, TK_BREAK,
|
|
TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
|
|
TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
|
|
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
|
|
-- operators
|
|
TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
|
|
TK_SHL, TK_SHR,
|
|
-- misc
|
|
TK_DBCOLON, TK_EOS,
|
|
TK_FLT, TK_INT, TK_NAME, TK_STRING =
|
|
{}, {},
|
|
{}, {}, {}, {}, {}, {}, {},
|
|
{}, {}, {}, {}, {}, {}, {}, {},
|
|
{}, {}, {}, {}, {},
|
|
{}, {}, {}, {}, {}, {}, {},
|
|
{}, {},
|
|
{}, {},
|
|
{}, {}, {}, {}
|
|
|
|
local keywords = {
|
|
["and"] = TK_AND,
|
|
["break"] = TK_BREAK,
|
|
["do"] = TK_DO,
|
|
["else"] = TK_ELSE,
|
|
["elseif"] = TK_ELSEIF,
|
|
["end"] = TK_END,
|
|
["false"] = TK_FALSE,
|
|
["for"] = TK_FOR,
|
|
["function"] = TK_FUNCTION,
|
|
["goto"] = TK_GOTO,
|
|
["if"] = TK_IF,
|
|
["in"] = TK_IN,
|
|
["local"] = TK_LOCAL,
|
|
["nil"] = TK_NIL,
|
|
["not"] = TK_NOT,
|
|
["or"] = TK_OR,
|
|
["repeat"] = TK_REPEAT,
|
|
["return"] = TK_RETURN,
|
|
["then"] = TK_THEN,
|
|
["true"] = TK_TRUE,
|
|
["until"] = TK_UNTIL,
|
|
["while"] = TK_WHILE,
|
|
}
|
|
|
|
local reverse_keywords = {}
|
|
for k,v in pairs(keywords) do
|
|
reverse_keywords[v] = k
|
|
end
|
|
|
|
local defs = selfify({})
|
|
|
|
defs.base = {
|
|
[" "] = "whitespace",
|
|
["\n"] = "newline",
|
|
["\r"] = "newline",
|
|
["\v"] = "whitespace",
|
|
["\t"] = "whitespace",
|
|
["\f"] = "whitespace",
|
|
["0"] = "digit",
|
|
["1"] = "digit",
|
|
["2"] = "digit",
|
|
["3"] = "digit",
|
|
["4"] = "digit",
|
|
["5"] = "digit",
|
|
["6"] = "digit",
|
|
["7"] = "digit",
|
|
["8"] = "digit",
|
|
["9"] = "digit",
|
|
["a"] = "hexdigit",
|
|
["b"] = "hexdigit",
|
|
["c"] = "hexdigit",
|
|
["d"] = "hexdigit",
|
|
["e"] = "hexdigit",
|
|
["f"] = "hexdigit",
|
|
["A"] = "hexdigit",
|
|
["B"] = "hexdigit",
|
|
["C"] = "hexdigit",
|
|
["D"] = "hexdigit",
|
|
["E"] = "hexdigit",
|
|
["F"] = "hexdigit",
|
|
["g"] = "alpha",
|
|
["h"] = "alpha",
|
|
["i"] = "alpha",
|
|
["j"] = "alpha",
|
|
["k"] = "alpha",
|
|
["l"] = "alpha",
|
|
["m"] = "alpha",
|
|
["n"] = "alpha",
|
|
["o"] = "alpha",
|
|
["p"] = "alpha",
|
|
["q"] = "alpha",
|
|
["r"] = "alpha",
|
|
["s"] = "alpha",
|
|
["t"] = "alpha",
|
|
["u"] = "alpha",
|
|
["v"] = "alpha",
|
|
["w"] = "alpha",
|
|
["x"] = "alpha",
|
|
["y"] = "alpha",
|
|
["z"] = "alpha",
|
|
["G"] = "alpha",
|
|
["H"] = "alpha",
|
|
["I"] = "alpha",
|
|
["J"] = "alpha",
|
|
["K"] = "alpha",
|
|
["L"] = "alpha",
|
|
["M"] = "alpha",
|
|
["N"] = "alpha",
|
|
["O"] = "alpha",
|
|
["P"] = "alpha",
|
|
["Q"] = "alpha",
|
|
["R"] = "alpha",
|
|
["S"] = "alpha",
|
|
["T"] = "alpha",
|
|
["U"] = "alpha",
|
|
["V"] = "alpha",
|
|
["W"] = "alpha",
|
|
["X"] = "alpha",
|
|
["Y"] = "alpha",
|
|
["Z"] = "alpha",
|
|
}
|
|
|
|
local function countline(state, token, rule)
|
|
state.line = (state.line or 1) + 1
|
|
end
|
|
|
|
local function mknewline(t, hookn, fallback)
|
|
fallback = fallback or t
|
|
t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
|
|
t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
|
|
return t
|
|
end
|
|
|
|
do local tstring = selfify({})
|
|
defs.string = tstring
|
|
tstring.defs = defs
|
|
do local tsescapes = setmetatable(mknewline({
|
|
["'"] = "insertraw",
|
|
['"'] = "insertraw",
|
|
['\\'] = "insertraw",
|
|
["a"] = "insertmap",
|
|
["b"] = "insertmap",
|
|
["f"] = "insertmap",
|
|
["n"] = "insertmap",
|
|
["r"] = "insertmap",
|
|
["t"] = "insertmap",
|
|
["v"] = "insertmap",
|
|
["z"] = "skipwhitespace",
|
|
["u"] = "unicode",
|
|
["x"] = "hex",
|
|
--["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
|
--["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
|
[1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
|
|
}, 1, tstring), {__index = defs.base})
|
|
defs.string.escapes = tsescapes
|
|
tsescapes.string = defs.string
|
|
|
|
function tsescapes.insertraw(state, token)
|
|
collect_fallback(state, token)
|
|
return "string"
|
|
end
|
|
|
|
do
|
|
local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
|
|
function tsescapes.insertmap(state, token)
|
|
collect_fallback(state, map[token])
|
|
return "string"
|
|
end
|
|
end
|
|
|
|
function tsescapes.digit(state, token)
|
|
local digit = string.find("1234567890", token, 1, true)
|
|
local num = state.in_digit
|
|
if digit then
|
|
num = (num or 0) * 10 + digit % 10
|
|
state.c = (state.c or 0) + 1
|
|
if state.c < 3 then
|
|
state.in_digit = num
|
|
return "digitc"
|
|
end
|
|
end
|
|
if num > 255 then
|
|
return nil
|
|
end
|
|
collect_fallback(state, string.char(num))
|
|
state.in_digit = nil
|
|
state.c = nil
|
|
if not digit then
|
|
collect_fallback(state, token)
|
|
end
|
|
return "string"
|
|
end
|
|
tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring})
|
|
tsescapes.digitc[1]=function(state, token, rule)
|
|
if rule == nil then
|
|
collect_fallback(state, string.char(state.in_digit))
|
|
state.in_digit = nil
|
|
state.c = nil
|
|
end
|
|
end
|
|
|
|
tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
|
|
function tsescapes.hex.hexdigit(state, token)
|
|
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
|
|
assert(digit, "this should never be called for non-hex-digits")
|
|
local num = state.in_hex
|
|
if num then
|
|
num = num * 16 + digit % 16
|
|
collect_fallback(state, string.char(num))
|
|
state.in_hex = nil
|
|
return "string"
|
|
else
|
|
state.in_hex = digit % 16
|
|
return "self"
|
|
end
|
|
end
|
|
|
|
do local tseunicode = {}
|
|
tseunicode["{"] = "hex"
|
|
do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
|
|
tseunicode.hex = tseuhex
|
|
function tseuhex.hexdigit(state, token)
|
|
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
|
|
assert(digit, "this should never be called for non-hex-digits")
|
|
state.in_hex = (state.in_hex or 0) * 16 + digit % 16
|
|
if state.in_hex <= 2147483647 then
|
|
return "self"
|
|
end
|
|
end
|
|
tseuhex["}"] = function(state, token)
|
|
local num = state.in_hex
|
|
state.in_hex = nil
|
|
if num < 128 then
|
|
collect_fallback(state, string.char(num))
|
|
return "string"
|
|
end
|
|
local bytes = ""
|
|
while num > 63 do
|
|
local v = num % 64
|
|
bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
|
|
num = (num - v) / 64
|
|
end
|
|
if num >= 2^6/(2^#bytes) then
|
|
local v = num % 64
|
|
bytes = string.char(128 + v) .. bytes
|
|
num = (num - v) / 64
|
|
end
|
|
do
|
|
local v = 0
|
|
for i=1,#bytes do
|
|
v = v + 128 / 2^i
|
|
end
|
|
v = v + num
|
|
assert(v < 126)
|
|
bytes = string.char(128 + v) .. bytes
|
|
end
|
|
collect_fallback(state, bytes)
|
|
return "string"
|
|
end
|
|
end
|
|
tsescapes.unicode = tseunicode
|
|
end
|
|
|
|
do local tseskipwhitespace = selfify(mknewline({
|
|
string = defs.string,
|
|
whitespace = "self",
|
|
[""] = "string",
|
|
[1] = collect_fallback,
|
|
}, 2))
|
|
--tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
|
|
--tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
|
|
local tbase = defs.base
|
|
local tbasemap = {whitespace = "whitespace"}
|
|
setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
|
|
tsescapes.skipwhitespace = tseskipwhitespace
|
|
end
|
|
end
|
|
|
|
tstring['\\'] = "escapes"
|
|
|
|
tstring['"'] = "close"
|
|
tstring["'"] = "close"
|
|
|
|
tstring['\n'] = false
|
|
tstring['\r'] = false
|
|
|
|
tstring[""] = "self"
|
|
|
|
tstring[1] = collect_fallback
|
|
|
|
function tstring.close(state, token)
|
|
if state.in_string == token then
|
|
state.in_string = nil
|
|
state[#state+1] = table.concat(state[COLLECT])
|
|
state[COLLECT] = nil
|
|
return "defs"
|
|
else
|
|
collect_fallback(state, token)
|
|
return "self"
|
|
end
|
|
end
|
|
end
|
|
|
|
do local tlongstring = {}
|
|
defs.longstring = tlongstring
|
|
do local tllongstring_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end})
|
|
tllongstring_proper[1] = false -- placeholder for newline handling
|
|
tllongstring_proper[2] = collect_fallback
|
|
|
|
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
|
|
tllongstring_proper.maybe_end = tllmaybe_end
|
|
tllmaybe_end.longstring_proper = tllongstring_proper
|
|
tllmaybe_end["="] = function(state, token)
|
|
state.longstring_close = state.longstring_close + 1
|
|
return "maybe_end"
|
|
end
|
|
tllmaybe_end["]"] = function(state, token)
|
|
if state.longstring_close == state.longstring_count then
|
|
state.longstring_close = nil
|
|
state.longstring_count = nil
|
|
local pos = #state
|
|
state[pos+1] = TK_STRING
|
|
state[pos+2] = table.concat(state[COLLECT])
|
|
state[COLLECT] = nil
|
|
return "defs"
|
|
else
|
|
collect_fallback(state, "]")
|
|
collect_fallback(state, ("="):rep(state.longstring_close))
|
|
state.longstring_close = 0
|
|
return "maybe_end"
|
|
end
|
|
end
|
|
tllmaybe_end[""] = "longstring_proper"
|
|
tllmaybe_end[1] = collect_fallback
|
|
tllmaybe_end[-1] = function(state, token, rule)
|
|
if not rule then
|
|
collect_fallback(state, "]")
|
|
collect_fallback(state, ("="):rep(state.longstring_close))
|
|
state.longstring_close = nil
|
|
end
|
|
end
|
|
end
|
|
|
|
tlongstring.longstring_proper = tllongstring_proper
|
|
mknewline(tlongstring, 1, tllongstring_proper)
|
|
setmetatable(tlongstring, {__index=tllongstring_proper})
|
|
end
|
|
end
|
|
|
|
defs["'"] = "string_open"
|
|
defs['"'] = "string_open"
|
|
defs["["] = "maybe_longstring"
|
|
defs.maybe_longstring = setmetatable({
|
|
defs = defs,
|
|
['['] = "longstring_open",
|
|
['='] = "longstring_open",
|
|
longstring_count = selfify({
|
|
["="] = function(state, token)
|
|
state.longstring_count = state.longstring_count + 1
|
|
return "self"
|
|
end,
|
|
["["] = function(state, token)
|
|
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
|
|
return "longstring"
|
|
end,
|
|
longstring = defs.longstring
|
|
}),
|
|
longstring_open = function(state, token)
|
|
if token == "=" then
|
|
state.longstring_count = state.longstring_count or 0 + 1
|
|
return "longstring_count"
|
|
elseif token == "[" then
|
|
state.longstring_count = 0
|
|
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
|
|
return "longstring"
|
|
end
|
|
end,
|
|
[-1] = function(state, token, rule)
|
|
if rule ~= "longstring_open" then
|
|
state[#state+1] = "["
|
|
end
|
|
end
|
|
}, {__index=defs})
|
|
|
|
-- these are needed for proper line counts
|
|
--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs})
|
|
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
|
|
mknewline(defs, 1)
|
|
|
|
-- thankfully comments are easy
|
|
defs["-"] = "maybe_comment"
|
|
do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
|
|
defs.maybe_comment = tmaybe_comment
|
|
tmaybe_comment[-1] = function(state, token, rule)
|
|
if rule ~= "comment" then
|
|
state[#state+1] = "-"
|
|
end
|
|
end
|
|
do local tmcomment = {comment_proper = selfify({})}
|
|
tmaybe_comment.comment = tmcomment
|
|
tmcomment[""] = "comment_proper"
|
|
tmcomment["["] = "maybe_longcomment"
|
|
mknewline(tmcomment, 1, defs)
|
|
mknewline(tmcomment.comment_proper, 1, defs)
|
|
tmcomment.comment_proper[""] = "self"
|
|
do local tllongcomment_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
|
|
tmcomment.longcomment = tllongcomment_proper
|
|
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
|
|
tllongcomment_proper.maybe_end = tllmaybe_end
|
|
tllmaybe_end.longcomment_proper = tllongcomment_proper
|
|
tllmaybe_end["="] = function(state, token)
|
|
state.longcomment_close = state.longcomment_close + 1
|
|
return "maybe_end"
|
|
end
|
|
tllmaybe_end["]"] = function(state, token)
|
|
if state.longcomment_close == state.longcomment_count then
|
|
state.longcomment_close = nil
|
|
state.longcomment_count = nil
|
|
return "defs"
|
|
else
|
|
state.longcomment_close = 0
|
|
return "maybe_end"
|
|
end
|
|
end
|
|
tllmaybe_end[""] = "longcomment_proper"
|
|
tllmaybe_end[-1] = function(state, token, rule)
|
|
if not rule then
|
|
state.longcomment_close = nil
|
|
end
|
|
end
|
|
end
|
|
|
|
mknewline(tllongcomment_proper, 1, tllongcomment_proper)
|
|
end
|
|
|
|
tmcomment.maybe_longcomment = setmetatable({
|
|
comment = tmcomment,
|
|
['['] = "longcomment_open",
|
|
['='] = "longcomment_open",
|
|
longcomment_count = setmetatable(selfify({
|
|
["="] = function(state, token)
|
|
state.longcomment_count = state.longcomment_count + 1
|
|
return "longcomment_count"
|
|
end,
|
|
["["] = "longcomment",
|
|
longcomment = tmcomment.longcomment,
|
|
}, "longcomment_count"), {__index=tmcomment}),
|
|
longcomment_open = function(state, token)
|
|
if token == "=" then
|
|
state.longcomment_count = state.longcomment_count or 0 + 1
|
|
return "longcomment_count"
|
|
elseif token == "[" then
|
|
state.longcomment_count = 0
|
|
return "longcomment"
|
|
end
|
|
end,
|
|
}, {__index=tmcomment})
|
|
end
|
|
end
|
|
|
|
local STATE = parser.STATE
|
|
|
|
defs.multitokens = setmetatable({
|
|
[-1] = function(state, token, rule)
|
|
if not state[STATE].multitoken[token] then
|
|
state[#state+1] = state[STATE].first
|
|
end
|
|
end,
|
|
second = function(state, token)
|
|
state[#state+1] = state[STATE].multitoken[token]
|
|
return "self" -- actually goes into defs
|
|
end
|
|
}, {
|
|
__index=defs,
|
|
__call=function(t, first, ...)
|
|
local function helper(t, second, result, ...)
|
|
if not second then return end
|
|
t[second] = "second"
|
|
t.multitoken[second] = result
|
|
return helper(t, ...)
|
|
end
|
|
defs[first] = setmetatable({
|
|
first = first,
|
|
multitoken = {}
|
|
}, {__index=t})
|
|
return helper(defs[first], ...)
|
|
end
|
|
})
|
|
|
|
defs.multitokens("=", "=", TK_EQ)
|
|
defs.multitokens("/", "/", TK_IDIV)
|
|
defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
|
|
defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
|
|
defs.multitokens("~", "=", TK_NE)
|
|
defs.multitokens(":", ":", TK_DBCOLON)
|
|
|
|
defs["."] = setmetatable({
|
|
[-1] = function(state, token, rule)
|
|
if token ~= "." then
|
|
if rule ~= "digit" then
|
|
state[#state+1] = "."
|
|
end
|
|
end
|
|
end,
|
|
digit = function(state, token, rule)
|
|
state[#state+1] = TK_FLT
|
|
state[COLLECT] = {".", coalesce=31}
|
|
return "in_decimal"
|
|
end,
|
|
["."] = setmetatable({
|
|
[-1] = function(state, token, rule)
|
|
if token ~= "." then
|
|
state[#state+1] = TK_CONCAT
|
|
end
|
|
end,
|
|
["."] = function(state, token)
|
|
state[#state+1] = TK_DOTS
|
|
return "self" -- actually goes into defs
|
|
end,
|
|
}, {__index=defs})
|
|
}, {__index=defs})
|
|
|
|
function defs.digit(state, token)
|
|
state[COLLECT] = {token, coalesce=31}
|
|
if token == "0" then
|
|
return "in_zero"
|
|
else
|
|
return "in_integer"
|
|
end
|
|
end
|
|
|
|
defs.in_integer = setmetatable(selfify({
|
|
hexdigit = "alpha",
|
|
alpha = false,
|
|
['e'] = "exp",
|
|
['E'] = "exp",
|
|
[parser.EOZ] = "self", -- defs
|
|
exp = function(state, token)
|
|
collect_fallback(state, token)
|
|
return "in_exp"
|
|
end,
|
|
['.'] = function(state, token)
|
|
collect_fallback(state, token)
|
|
return "in_decimal"
|
|
end,
|
|
digit = function(state, token)
|
|
collect_fallback(state, token)
|
|
return "in_digit"
|
|
end,
|
|
[-1] = function(state, token, rule)
|
|
-- TODO figure out best order for these checks
|
|
if rule == "digit" or token == "." or rule == "hexdigit" or rule == "into_hex" or rule == "exp" then return end
|
|
state[#state+1] = state[STATE].numtype
|
|
state[#state+1] = tonumber(table.concat(state[COLLECT])) -- TODO maybe not the best option
|
|
state[COLLECT] = nil
|
|
end,
|
|
numtype = TK_INT
|
|
}, "in_digit"), {__index=defs})
|
|
|
|
defs.in_zero = setmetatable({
|
|
['x'] = "into_hex",
|
|
['X'] = "into_hex",
|
|
into_hex = function(state, token)
|
|
collect_fallback(state, token)
|
|
return "in_hex"
|
|
end,
|
|
}, {__index=defs.in_integer})
|
|
|
|
defs.in_decimal = setmetatable(selfify({
|
|
['.'] = false,
|
|
numtype = TK_FLT
|
|
}, "in_digit"), {__index=defs.in_integer})
|
|
|
|
defs.in_expnum = setmetatable(selfify({
|
|
exp = false,
|
|
}, "in_digit"), {__index=defs.in_decimal})
|
|
|
|
defs.in_subexp = setmetatable({
|
|
in_expnum = defs.in_expnum,
|
|
digit = function(state, token)
|
|
collect_fallback(state, token)
|
|
return "in_expnum"
|
|
end,
|
|
}, {__index=defs.base})
|
|
|
|
defs.in_exp = setmetatable({
|
|
in_subexp = defs.in_subexp,
|
|
["+"] = "sign",
|
|
["-"] = "sign",
|
|
sign = function(state, token)
|
|
collect_fallback(state, token)
|
|
return "in_subexp"
|
|
end,
|
|
}, {__index=defs.in_subexp})
|
|
|
|
defs.in_hex = setmetatable(selfify({
|
|
in_decimal = "in_hex_fraction",
|
|
hexdigit = 'digit',
|
|
['e'] = 'hexdigit',
|
|
['E'] = 'hexdigit',
|
|
['p'] = 'exp',
|
|
['P'] = 'exp',
|
|
}, "in_digit"), {__index=defs.in_integer})
|
|
|
|
defs.in_hex_fraction = setmetatable(selfify({
|
|
['.'] = false,
|
|
numtype = TK_FLT
|
|
}, "in_digit"), {__index=defs.in_hex})
|
|
|
|
function defs.simpletoken(state, token)
|
|
state[#state+1] = token
|
|
return "self"
|
|
end
|
|
|
|
for token in string.gmatch("+*%^#&|(){}];,", ".") do
|
|
defs[token] = "simpletoken"
|
|
end
|
|
|
|
defs.whitespace = "self"
|
|
defs.hexdigit = "alpha"
|
|
defs["_"] = "alpha"
|
|
defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs})
|
|
function defs.alpha(state, token)
|
|
state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff?
|
|
collect_fallback(state, token)
|
|
return "in_alpha"
|
|
end
|
|
defs.in_alpha[-1] = function(state, token, rule)
|
|
if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then
|
|
collect_fallback(state, token)
|
|
else
|
|
local key = table.concat(state[COLLECT])
|
|
state[COLLECT] = nil
|
|
local keyword = keywords[key]
|
|
if keyword then
|
|
state[#state+1] = keyword
|
|
else
|
|
local pos = #state
|
|
state[pos+1] = TK_NAME
|
|
state[pos+2] = key
|
|
end
|
|
end
|
|
end
|
|
|
|
setmetatable(defs, {__index=defs.base})
|
|
|
|
function defs.string_open(state, token)
|
|
if not state.in_string then
|
|
state[#state+1] = TK_STRING
|
|
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
|
|
state.in_string = token
|
|
return "string"
|
|
end
|
|
assert("this shouldn't happen")
|
|
end
|
|
|
|
return {
|
|
defs = defs,
|
|
tokens = {
|
|
TK_AND = TK_AND, TK_BREAK = TK_BREAK,
|
|
TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
|
|
TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
|
|
TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
|
|
TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
|
|
TK_SHL = TK_SHL, TK_SHR = TK_SHR,
|
|
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
|
|
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
|
|
},
|
|
reverse_keywords = reverse_keywords,
|
|
reverse_tokens = {
|
|
[TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
|
|
[TK_SHL] = "<<", [TK_SHR] = ">>",
|
|
[TK_DBCOLON] = "::", [TK_EOS] = "<eof>",
|
|
[TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>"
|
|
},
|
|
}
|