lua.cratera/luatokens.lua

313 lines
9.9 KiB
Lua

-- Lua defs
-- we need some stuff from here
local parser = require "parser"
local selfify = parser.selfify
local EOF = parser.EOF
local COLLECT = parser.COLLECT
local collect_fallback = parser.collect_fallback
-- "dummies"
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
local TK_AND, TK_BREAK,
TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
TK_SHL, TK_SHR,
TK_DBCOLON, TK_EOS,
TK_FLT, TK_INT, TK_NAME, TK_STRING =
{}, {},
{}, {}, {}, {}, {}, {}, {},
{}, {}, {}, {}, {}, {}, {}, {},
{}, {}, {}, {}, {},
{}, {}, {}, {}, {}, {}, {},
{}, {},
{}, {},
{}, {}, {}, {}
local defs = {}
defs.base = {
[" "] = "whitespace",
["\n"] = "newline",
["\r"] = "newline",
["\v"] = "whitespace",
["\t"] = "whitespace",
["\f"] = "whitespace",
["0"] = "digit",
["1"] = "digit",
["2"] = "digit",
["3"] = "digit",
["4"] = "digit",
["5"] = "digit",
["6"] = "digit",
["7"] = "digit",
["8"] = "digit",
["9"] = "digit",
["a"] = "hexdigit",
["b"] = "hexdigit",
["c"] = "hexdigit",
["d"] = "hexdigit",
["e"] = "hexdigit",
["f"] = "hexdigit",
["A"] = "hexdigit",
["B"] = "hexdigit",
["C"] = "hexdigit",
["D"] = "hexdigit",
["E"] = "hexdigit",
["F"] = "hexdigit",
["g"] = "alpha",
["h"] = "alpha",
["i"] = "alpha",
["j"] = "alpha",
["k"] = "alpha",
["l"] = "alpha",
["m"] = "alpha",
["n"] = "alpha",
["o"] = "alpha",
["p"] = "alpha",
["q"] = "alpha",
["r"] = "alpha",
["s"] = "alpha",
["t"] = "alpha",
["u"] = "alpha",
["v"] = "alpha",
["w"] = "alpha",
["x"] = "alpha",
["y"] = "alpha",
["z"] = "alpha",
["G"] = "alpha",
["H"] = "alpha",
["I"] = "alpha",
["J"] = "alpha",
["K"] = "alpha",
["L"] = "alpha",
["M"] = "alpha",
["N"] = "alpha",
["O"] = "alpha",
["P"] = "alpha",
["Q"] = "alpha",
["R"] = "alpha",
["S"] = "alpha",
["T"] = "alpha",
["U"] = "alpha",
["V"] = "alpha",
["W"] = "alpha",
["X"] = "alpha",
["Y"] = "alpha",
["Z"] = "alpha",
}
local function linecount(state, token, rule)
-- TODO fix
if token == "\n" or token == "\r" then
state.line = (state.line or 1) + 1
end
end
do local tstring = selfify({})
defs.string = tstring
tstring.defs = defs
do local tsescapes = setmetatable({
["'"] = "insertraw",
['"'] = "insertraw",
['\\'] = "insertraw",
["a"] = "insertmap",
["b"] = "insertmap",
["f"] = "insertmap",
["n"] = "insertmap",
["r"] = "insertmap",
["t"] = "insertmap",
["v"] = "insertmap",
["z"] = "skipwhitespace",
["u"] = "unicode",
["x"] = "hex",
["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
[1] = linecount,
[2] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
}, {__index = defs.base})
defs.string.escapes = tsescapes
tsescapes.string = defs.string
function tsescapes.insertraw(state, token)
collect_fallback(state, token)
return "string"
end
do
local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
function tsescapes.insertmap(state, token)
collect_fallback(state, map[token])
return "string"
end
end
function tsescapes.digit(state, token)
local digit = string.find("1234567890", token, 1, true)
local num = state.in_digit
if digit then
num = (num or 0) * 10 + digit % 10
state.c = (state.c or 0) + 1
if state.c < 3 then
state.in_digit = num
return "digitc"
end
end
if num > 255 then
return nil
end
collect_fallback(state, string.char(num))
state.in_digit = nil
state.c = nil
if not digit then
collect_fallback(state, token)
end
return "string"
end
tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring})
tsescapes.digitc[1]=function(state, token, rule)
if rule == nil then
collect_fallback(state, string.char(state.in_digit))
state.in_digit = nil
state.c = nil
end
end
tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
function tsescapes.hex.hexdigit(state, token)
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
assert(digit, "this should never be called for non-hex-digits")
local num = state.in_hex
if num then
num = num * 16 + digit % 16
collect_fallback(state, string.char(num))
state.in_hex = nil
return "string"
else
state.in_hex = digit % 16
return "self"
end
end
do local tseunicode = {}
tseunicode["{"] = "hex"
do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
tseunicode.hex = tseuhex
function tseuhex.hexdigit(state, token)
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
assert(digit, "this should never be called for non-hex-digits")
state.in_hex = (state.in_hex or 0) * 16 + digit % 16
if state.in_hex <= 2147483647 then
return "self"
end
end
tseuhex["}"] = function(state, token)
local num = state.in_hex
state.in_hex = nil
if num < 128 then
collect_fallback(state, string.char(num))
return "string"
end
local bytes = ""
while num > 63 do
local v = num % 64
bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
num = (num - v) / 64
end
if num >= 2^6/(2^#bytes) then
local v = num % 64
bytes = string.char(128 + v) .. bytes
num = (num - v) / 64
end
do
local v = 0
for i=1,#bytes do
v = v + 128 / 2^i
end
v = v + num
assert(v < 126)
bytes = string.char(128 + v) .. bytes
end
collect_fallback(state, bytes)
return "string"
end
end
tsescapes.unicode = tseunicode
end
do local tseskipwhitespace = selfify({
string = defs.string,
whitespace = "self",
[""] = "string",
[1] = collect_fallback,
[2] = linecount,
})
local tbase = defs.base
local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
tsescapes.skipwhitespace = tseskipwhitespace
end
end
tstring['\\'] = "escapes"
tstring['"'] = "close"
tstring["'"] = "close"
tstring['\n'] = false
tstring['\r'] = false
tstring[""] = "self"
tstring[1] = collect_fallback
function tstring.close(state, token)
if state.in_string == token then
state.in_string = nil
state[#state+1] = table.concat(state[COLLECT])
state[COLLECT] = nil
return "defs"
else
collect_fallback(state, token)
return "self"
end
end
end
do local tlongstring = {}
-- TODO
end
defs["'"] = "string_open"
defs['"'] = "string_open"
defs["["] = "maybe_longstring"
defs[1] = linecount
setmetatable(defs, {__index=whitespace})
function defs.string_open(state, token)
if not state.in_string then
state[#state+1] = TK_STRING
state[COLLECT] = {}
state.in_string = token
return "string"
end
assert("this shouldn't happen")
end
return {
defs = defs,
tokens = {
TK_AND = TK_AND, TK_BREAK = TK_BREAK,
TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
TK_SHL = TK_SHL, TK_SHR = TK_SHR,
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
},
}