This commit is contained in:
SoniEx2 2019-04-06 17:42:00 -03:00
parent 1f1f6c0732
commit 0118cdcb80
3 changed files with 171 additions and 45 deletions

View File

@ -1,4 +1,4 @@
-- Lua tokens
-- Lua defs
-- we need some stuff from here
local parser = require "parser"
@ -8,11 +8,27 @@ local COLLECT = parser.COLLECT
local collect_fallback = parser.collect_fallback
-- "dummies"
local TK_STRING = {}
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
local TK_AND, TK_BREAK,
TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
TK_SHL, TK_SHR,
TK_DBCOLON, TK_EOS,
TK_FLT, TK_INT, TK_NAME, TK_STRING =
{}, {},
{}, {}, {}, {}, {}, {}, {},
{}, {}, {}, {}, {}, {}, {}, {},
{}, {}, {}, {}, {},
{}, {}, {}, {}, {}, {}, {},
{}, {},
{}, {},
{}, {}, {}, {}
local tokens = {}
local defs = {}
tokens.base = {
defs.base = {
[" "] = "whitespace",
["\n"] = "newline",
["\r"] = "newline",
@ -84,14 +100,15 @@ tokens.base = {
}
local function linecount(state, token, rule)
-- TODO fix
if token == "\n" or token == "\r" then
state.line = (state.line or 1) + 1
end
end
do local tstring = selfify({})
tokens.string = tstring
tstring.tokens = tokens
defs.string = tstring
tstring.defs = defs
do local tsescapes = setmetatable({
["'"] = "insertraw",
['"'] = "insertraw",
@ -110,9 +127,9 @@ do local tstring = selfify({})
["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
[1] = linecount,
[2] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
}, {__index = tokens.base})
tokens.string.escapes = tsescapes
tsescapes.string = tokens.string
}, {__index = defs.base})
defs.string.escapes = tsescapes
tsescapes.string = defs.string
function tsescapes.insertraw(state, token)
collect_fallback(state, token)
@ -158,7 +175,7 @@ do local tstring = selfify({})
end
end
tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base})
tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
function tsescapes.hex.hexdigit(state, token)
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
assert(digit, "this should never be called for non-hex-digits")
@ -174,14 +191,60 @@ do local tstring = selfify({})
end
end
do local tseunicode = {}
tseunicode["{"] = "hex"
do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
tseunicode.hex = tseuhex
function tseuhex.hexdigit(state, token)
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
assert(digit, "this should never be called for non-hex-digits")
state.in_hex = (state.in_hex or 0) * 16 + digit % 16
if state.in_hex <= 2147483647 then
return "self"
end
end
tseuhex["}"] = function(state, token)
local num = state.in_hex
state.in_hex = nil
if num < 128 then
collect_fallback(state, string.char(num))
return "string"
end
local bytes = ""
while num > 63 do
local v = num % 64
bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
num = (num - v) / 64
end
if num >= 2^6/(2^#bytes) then
local v = num % 64
bytes = string.char(128 + v) .. bytes
num = (num - v) / 64
end
do
local v = 0
for i=1,#bytes do
v = v + 128 / 2^i
end
v = v + num
assert(v < 126)
bytes = string.char(128 + v) .. bytes
end
collect_fallback(state, bytes)
return "string"
end
end
tsescapes.unicode = tseunicode
end
do local tseskipwhitespace = selfify({
string = tokens.string,
string = defs.string,
whitespace = "self",
[""] = "string",
[1] = collect_fallback,
[2] = linecount,
})
local tbase = tokens.base
local tbase = defs.base
local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
tsescapes.skipwhitespace = tseskipwhitespace
@ -205,7 +268,7 @@ do local tstring = selfify({})
state.in_string = nil
state[#state+1] = table.concat(state[COLLECT])
state[COLLECT] = nil
return "tokens"
return "defs"
else
collect_fallback(state, token)
return "self"
@ -213,13 +276,18 @@ do local tstring = selfify({})
end
end
tokens["'"] = "string_open"
tokens['"'] = "string_open"
tokens[1] = linecount
do local tlongstring = {}
-- TODO
end
setmetatable(tokens, {__index=whitespace})
defs["'"] = "string_open"
defs['"'] = "string_open"
defs["["] = "maybe_longstring"
defs[1] = linecount
function tokens.string_open(state, token)
setmetatable(defs, {__index=whitespace})
function defs.string_open(state, token)
if not state.in_string then
state[#state+1] = TK_STRING
state[COLLECT] = {}
@ -230,6 +298,15 @@ function tokens.string_open(state, token)
end
return {
tokens = tokens,
TK_STRING = TK_STRING,
defs = defs,
tokens = {
TK_AND = TK_AND, TK_BREAK = TK_BREAK,
TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
TK_SHL = TK_SHL, TK_SHR = TK_SHR,
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
},
}

View File

@ -72,7 +72,7 @@ local function get_next_common(state, in_pos, token)
end
local function get_next_table(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling
in_pos = in_pos + 1
local token = state[DATA][in_pos - state[OFFDATA]]
if token == nil then
@ -84,10 +84,10 @@ local function get_next_table(state, in_pos)
end
local function get_next_string(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling
in_pos = in_pos + 1
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or ""
if token == "" then
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
if token == nil then
state[OFFDATA] = in_pos - 1
state[DATA] = state[GEN]()
return get_next_string(state, state[OFFDATA])

View File

@ -56,57 +56,106 @@ do -- trim left spaces
end
return "self"
end
for k,v in ipairs({"hello", " hello", "\t \v \n\r hallo", "I really like this parser thingy if it can be called that"}) do
for k,v in ipairs({"hello", " hello", "\t \v \n\r hello"}) do
local state, err = parser.parse(defs, v)
local case = case()
if not state then
print(case(), err)
print(case, err)
else
print(case(), table.concat(state))
assert(table.concat(state) == "hello")
end
end
end -- trim left spaces
do -- lua tokens
local luatokens = require "luatokens"
local tokens = luatokens.tokens
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, [["hello world"]])
local case = case()
print(case, "---- IN TOKENS ----")
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
for i,v in ipairs(state) do
print(case, i, v)
end
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "hello world")
end
print(case, "---- OUT TOKENS ----")
end -- lua tokens
do -- more lua tokens
local luatokens = require "luatokens"
local tokens = luatokens.tokens
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, [["\a\b\f\n\r\t\v\\\"\'\z \x41\65\
"]])
local case = case()
print(case, "---- IN TOKENS ----")
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
for i,v in ipairs(state) do
print(case, i, v)
if v == luatokens.TK_STRING then
in_string = true
elseif in_string then
print(case, v:gsub(".", function(v) return "\\"..string.byte(v) end))
in_string = false
end
end
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
end
end -- lua tokens
do -- even more lua tokens
local luatokens = require "luatokens"
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, [["\u{000000000000000000000000000000000000000000000000000000000000041}"]])
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "A")
end
end -- lua tokens
do -- even more lua tokens
local luatokens = require "luatokens"
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, [["\u{7F}""\u{80}""\u{7FF}""\u{800}""\u{FFFF}""\u{10000}""\u{1FFFFF}""\u{200000}""\u{3FFFFFF}""\u{4000000}""\u{7FFFFFFF}"]])
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\127")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\194\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\223\191")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\224\160\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\239\191\191")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\240\144\128\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\247\191\191\191")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\248\136\128\128\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\251\191\191\191\191")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\252\132\128\128\128\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\253\191\191\191\191\191")
end
print(case, "---- OUT TOKENS ----")
end -- lua tokens