Keywords and long strings

This commit is contained in:
SoniEx2 2019-04-07 12:45:34 -03:00
parent d50ad87794
commit 282dbabb7e
3 changed files with 217 additions and 10 deletions

View File

@ -9,12 +9,15 @@ local collect_fallback = parser.collect_fallback
-- "dummies" -- "dummies"
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED -- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
-- keywords
local TK_AND, TK_BREAK, local TK_AND, TK_BREAK,
TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION, TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT, TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
-- operators
TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
TK_SHL, TK_SHR, TK_SHL, TK_SHR,
-- misc
TK_DBCOLON, TK_EOS, TK_DBCOLON, TK_EOS,
TK_FLT, TK_INT, TK_NAME, TK_STRING = TK_FLT, TK_INT, TK_NAME, TK_STRING =
{}, {}, {}, {},
@ -26,7 +29,32 @@ local TK_AND, TK_BREAK,
{}, {}, {}, {},
{}, {}, {}, {} {}, {}, {}, {}
local defs = {} local keywords = {
["and"] = TK_AND,
["break"] = TK_BREAK,
["do"] = TK_DO,
["else"] = TK_ELSE,
["elseif"] = TK_ELSEIF,
["end"] = TK_END,
["false"] = TK_FALSE,
["for"] = TK_FOR,
["function"] = TK_FUNCTION,
["goto"] = TK_GOTO,
["if"] = TK_IF,
["in"] = TK_IN,
["local"] = TK_LOCAL,
["nil"] = TK_NIL,
["not"] = TK_NOT,
["or"] = TK_OR,
["repeat"] = TK_REPEAT,
["return"] = TK_RETURN,
["then"] = TK_THEN,
["true"] = TK_TRUE,
["until"] = TK_UNTIL,
["while"] = TK_WHILE,
}
local defs = selfify({})
defs.base = { defs.base = {
[" "] = "whitespace", [" "] = "whitespace",
@ -280,9 +308,46 @@ do local tstring = selfify({})
end end
end end
do local tlongstring = selfify({}) do local tlongstring = {}
defs.longstring = tlongstring defs.longstring = tlongstring
-- TODO do local tllongstring_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end})
tllongstring_proper[1] = collect_fallback
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
tllongstring_proper.maybe_end = tllmaybe_end
tllmaybe_end["="] = function(state, token)
state.longstring_close = state.longstring_close + 1
return "maybe_end"
end
tllmaybe_end["]"] = function(state, token)
if state.longstring_close == state.longstring_count then
state.longstring_close = nil
state.longstring_count = nil
local pos = #state
state[pos+1] = TK_STRING
state[pos+2] = table.concat(state[COLLECT])
state[COLLECT] = nil
return "defs"
else
collect_fallback(state, "]")
collect_fallback(state, ("="):rep(state.longstring_close))
state.longstring_close = 0
return "maybe_end"
end
end
tllmaybe_end[-1] = function(state, token, rule)
if not rule then
collect_fallback(state, "]")
collect_fallback(state, ("="):rep(state.longstring_close))
state.longstring_close = nil
end
end
end
tlongstring.longstring_proper = tllongstring_proper
mknewline(tlongstring, 1, tllongstring_proper)
setmetatable(tlongstring, {__index=tllongstring_proper})
end
end end
defs["'"] = "string_open" defs["'"] = "string_open"
@ -297,6 +362,10 @@ defs.maybe_longstring = setmetatable({
state.longstring_count = state.longstring_count + 1 state.longstring_count = state.longstring_count + 1
return "self" return "self"
end, end,
["["] = function(state, token)
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
return "longstring"
end,
longstring = defs.longstring longstring = defs.longstring
}), }),
longstring_open = function(state, token) longstring_open = function(state, token)
@ -304,6 +373,8 @@ defs.maybe_longstring = setmetatable({
state.longstring_count = state.longstring_count or 0 + 1 state.longstring_count = state.longstring_count or 0 + 1
return "longstring_count" return "longstring_count"
elseif token == "[" then elseif token == "[" then
state.longstring_count = 0
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
return "longstring" return "longstring"
end end
end, end,
@ -319,12 +390,38 @@ defs.maybe_longstring = setmetatable({
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs}) --defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
mknewline(defs, 1) mknewline(defs, 1)
defs.whitespace = "self"
defs.hexdigit = "alpha"
defs["_"] = "alpha"
defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs})
function defs.alpha(state, token)
state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff?
collect_fallback(state, token)
return "in_alpha"
end
defs.in_alpha[-1] = function(state, token, rule)
if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then
collect_fallback(state, token)
else
local key = table.concat(state[COLLECT])
state[COLLECT] = nil
local keyword = keywords[key]
if keyword then
state[#state+1] = keyword
else
local pos = #state
state[pos+1] = TK_NAME
state[pos+2] = key
end
end
end
setmetatable(defs, {__index=defs.base}) setmetatable(defs, {__index=defs.base})
function defs.string_open(state, token) function defs.string_open(state, token)
if not state.in_string then if not state.in_string then
state[#state+1] = TK_STRING state[#state+1] = TK_STRING
state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff? state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
state.in_string = token state.in_string = token
return "string" return "string"
end end

View File

@ -24,6 +24,8 @@ local DATA = {}
local GEN = {} local GEN = {}
-- key for DATA OFFSET -- key for DATA OFFSET
local OFFDATA = {} local OFFDATA = {}
-- key for End of Stream
local EOZ = {}
local optimize_lookups = {} local optimize_lookups = {}
for i=0, 255 do for i=0, 255 do
@ -39,6 +41,9 @@ local function get_next_common(state, in_pos, token)
if state[STATE] then if state[STATE] then
local st = state[STATE] local st = state[STATE]
local rule = st[token] local rule = st[token]
if not rule and token == EOZ then
return in_pos, state
end
do -- pre-hooks do -- pre-hooks
local pos = -1 local pos = -1
local hook = st[pos] local hook = st[pos]
@ -83,7 +88,9 @@ local function get_next_common(state, in_pos, token)
end end
local function get_next_table(state, in_pos) local function get_next_table(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling if state[DATA] == nil or #state[DATA] == 0 then
return get_next_common(state, in_pos, EOZ)
end
in_pos = in_pos + 1 in_pos = in_pos + 1
local token = state[DATA][in_pos - state[OFFDATA]] local token = state[DATA][in_pos - state[OFFDATA]]
if token == nil then if token == nil then
@ -95,7 +102,13 @@ local function get_next_table(state, in_pos)
end end
local function get_next_string(state, in_pos) local function get_next_string(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling if state[DATA] == nil or #state[DATA] == 0 then
if state[STATE] == nil then
return in_pos, state
else
return get_next_common(state, in_pos, EOZ)
end
end
in_pos = in_pos + 1 in_pos = in_pos + 1
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
if token == nil then if token == nil then
@ -142,6 +155,7 @@ local COLLECT = {}
return { return {
STATE = STATE, STATE = STATE,
COLLECT = COLLECT, COLLECT = COLLECT,
EOZ = EOZ,
stream = stream, stream = stream,
parse = parse, parse = parse,
-- common utility function -- common utility function
@ -154,7 +168,7 @@ return {
if not rule then if not rule then
local t = state[COLLECT] local t = state[COLLECT]
t[#t+1] = token t[#t+1] = token
if t.coalesce and #t > t.coalesce then if t.coalesce and #t >= t.coalesce then
t[1] = table.concat(t) t[1] = table.concat(t)
for i=2, #t do t[i] = nil end for i=2, #t do t[i] = nil end
end end

102
test.lua
View File

@ -24,6 +24,14 @@ local function case()
return caseno return caseno
end end
do -- basic check
local case = case()
local defs = {}
local count = 0
local state, err = parser.parse(defs, function() assert(count == 0, "should be called only once"); count = count + 1 return nil end)
assert(state)
end -- basic check
do -- trim left spaces do -- trim left spaces
local defs = {} local defs = {}
defs.self = defs defs.self = defs
@ -82,6 +90,7 @@ do -- lua tokens
else else
assert(state[1] == luatokens.tokens.TK_STRING) assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "hello world") assert(state[2] == "hello world")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- lua tokens
@ -101,6 +110,7 @@ do -- more lua tokens
else else
assert(state[1] == luatokens.tokens.TK_STRING) assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10") assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
assert(state.line == 2)
end end
end -- lua tokens end -- lua tokens
@ -119,6 +129,7 @@ do -- even more lua tokens
else else
assert(state[1] == luatokens.tokens.TK_STRING) assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "A") assert(state[2] == "A")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- lua tokens
@ -157,6 +168,7 @@ do -- even more lua tokens
assert(table.remove(state, 1) == "\252\132\128\128\128\128") assert(table.remove(state, 1) == "\252\132\128\128\128\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\253\191\191\191\191\191") assert(table.remove(state, 1) == "\253\191\191\191\191\191")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- lua tokens
@ -176,6 +188,7 @@ do -- simple lua tokens
assert(table.remove(state, 1) == "[") assert(table.remove(state, 1) == "[")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "") assert(table.remove(state, 1) == "")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- lua tokens
@ -194,8 +207,9 @@ do -- simple long string
else else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "") assert(table.remove(state, 1) == "")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- long string
do -- long string with depth 1 do -- long string with depth 1
local luatokens = require "luatokens" local luatokens = require "luatokens"
@ -212,8 +226,9 @@ do -- long string with depth 1
else else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "") assert(table.remove(state, 1) == "")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- long string
do -- long string with "nested" long string do -- long string with "nested" long string
local luatokens = require "luatokens" local luatokens = require "luatokens"
@ -230,5 +245,86 @@ do -- long string with "nested" long string
else else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "[[]]") assert(table.remove(state, 1) == "[[]]")
assert(state.line == 1 or not state.line)
end end
end -- lua tokens end -- long string
do -- long string edge cases
local luatokens = require "luatokens"
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "]=")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "]")
assert(state.line == 1 or not state.line)
end
end -- long string
do -- keywords
local luatokens = require "luatokens"
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, [[
and break do else elseif end
false for function goto if in
local nil not or repeat return
then true until while]])
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
assert(table.remove(state, 1) == luatokens.tokens.TK_AND)
assert(table.remove(state, 1) == luatokens.tokens.TK_BREAK)
assert(table.remove(state, 1) == luatokens.tokens.TK_DO)
assert(table.remove(state, 1) == luatokens.tokens.TK_ELSE)
assert(table.remove(state, 1) == luatokens.tokens.TK_ELSEIF)
assert(table.remove(state, 1) == luatokens.tokens.TK_END)
assert(table.remove(state, 1) == luatokens.tokens.TK_FALSE)
assert(table.remove(state, 1) == luatokens.tokens.TK_FOR)
assert(table.remove(state, 1) == luatokens.tokens.TK_FUNCTION)
assert(table.remove(state, 1) == luatokens.tokens.TK_GOTO)
assert(table.remove(state, 1) == luatokens.tokens.TK_IF)
assert(table.remove(state, 1) == luatokens.tokens.TK_IN)
assert(table.remove(state, 1) == luatokens.tokens.TK_LOCAL)
assert(table.remove(state, 1) == luatokens.tokens.TK_NIL)
assert(table.remove(state, 1) == luatokens.tokens.TK_NOT)
assert(table.remove(state, 1) == luatokens.tokens.TK_OR)
assert(table.remove(state, 1) == luatokens.tokens.TK_REPEAT)
assert(table.remove(state, 1) == luatokens.tokens.TK_RETURN)
assert(table.remove(state, 1) == luatokens.tokens.TK_THEN)
assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
assert(state.line == 4)
end
end -- keywords
do -- FUCK
local luatokens = require "luatokens"
local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "5.1" or _VERSION == "5.2") and "*a" or "a")
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
end
end -- FUCK