Keywords and long strings
This commit is contained in:
parent
d50ad87794
commit
282dbabb7e
105
luatokens.lua
105
luatokens.lua
|
@ -9,12 +9,15 @@ local collect_fallback = parser.collect_fallback
|
|||
|
||||
-- "dummies"
|
||||
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
|
||||
-- keywords
|
||||
local TK_AND, TK_BREAK,
|
||||
TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
|
||||
TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
|
||||
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
|
||||
-- operators
|
||||
TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
|
||||
TK_SHL, TK_SHR,
|
||||
-- misc
|
||||
TK_DBCOLON, TK_EOS,
|
||||
TK_FLT, TK_INT, TK_NAME, TK_STRING =
|
||||
{}, {},
|
||||
|
@ -26,7 +29,32 @@ local TK_AND, TK_BREAK,
|
|||
{}, {},
|
||||
{}, {}, {}, {}
|
||||
|
||||
local defs = {}
|
||||
local keywords = {
|
||||
["and"] = TK_AND,
|
||||
["break"] = TK_BREAK,
|
||||
["do"] = TK_DO,
|
||||
["else"] = TK_ELSE,
|
||||
["elseif"] = TK_ELSEIF,
|
||||
["end"] = TK_END,
|
||||
["false"] = TK_FALSE,
|
||||
["for"] = TK_FOR,
|
||||
["function"] = TK_FUNCTION,
|
||||
["goto"] = TK_GOTO,
|
||||
["if"] = TK_IF,
|
||||
["in"] = TK_IN,
|
||||
["local"] = TK_LOCAL,
|
||||
["nil"] = TK_NIL,
|
||||
["not"] = TK_NOT,
|
||||
["or"] = TK_OR,
|
||||
["repeat"] = TK_REPEAT,
|
||||
["return"] = TK_RETURN,
|
||||
["then"] = TK_THEN,
|
||||
["true"] = TK_TRUE,
|
||||
["until"] = TK_UNTIL,
|
||||
["while"] = TK_WHILE,
|
||||
}
|
||||
|
||||
local defs = selfify({})
|
||||
|
||||
defs.base = {
|
||||
[" "] = "whitespace",
|
||||
|
@ -280,9 +308,46 @@ do local tstring = selfify({})
|
|||
end
|
||||
end
|
||||
|
||||
do local tlongstring = selfify({})
|
||||
do local tlongstring = {}
|
||||
defs.longstring = tlongstring
|
||||
-- TODO
|
||||
do local tllongstring_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end})
|
||||
tllongstring_proper[1] = collect_fallback
|
||||
|
||||
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
|
||||
tllongstring_proper.maybe_end = tllmaybe_end
|
||||
tllmaybe_end["="] = function(state, token)
|
||||
state.longstring_close = state.longstring_close + 1
|
||||
return "maybe_end"
|
||||
end
|
||||
tllmaybe_end["]"] = function(state, token)
|
||||
if state.longstring_close == state.longstring_count then
|
||||
state.longstring_close = nil
|
||||
state.longstring_count = nil
|
||||
local pos = #state
|
||||
state[pos+1] = TK_STRING
|
||||
state[pos+2] = table.concat(state[COLLECT])
|
||||
state[COLLECT] = nil
|
||||
return "defs"
|
||||
else
|
||||
collect_fallback(state, "]")
|
||||
collect_fallback(state, ("="):rep(state.longstring_close))
|
||||
state.longstring_close = 0
|
||||
return "maybe_end"
|
||||
end
|
||||
end
|
||||
tllmaybe_end[-1] = function(state, token, rule)
|
||||
if not rule then
|
||||
collect_fallback(state, "]")
|
||||
collect_fallback(state, ("="):rep(state.longstring_close))
|
||||
state.longstring_close = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
tlongstring.longstring_proper = tllongstring_proper
|
||||
mknewline(tlongstring, 1, tllongstring_proper)
|
||||
setmetatable(tlongstring, {__index=tllongstring_proper})
|
||||
end
|
||||
end
|
||||
|
||||
defs["'"] = "string_open"
|
||||
|
@ -297,6 +362,10 @@ defs.maybe_longstring = setmetatable({
|
|||
state.longstring_count = state.longstring_count + 1
|
||||
return "self"
|
||||
end,
|
||||
["["] = function(state, token)
|
||||
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
|
||||
return "longstring"
|
||||
end,
|
||||
longstring = defs.longstring
|
||||
}),
|
||||
longstring_open = function(state, token)
|
||||
|
@ -304,6 +373,8 @@ defs.maybe_longstring = setmetatable({
|
|||
state.longstring_count = state.longstring_count or 0 + 1
|
||||
return "longstring_count"
|
||||
elseif token == "[" then
|
||||
state.longstring_count = 0
|
||||
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
|
||||
return "longstring"
|
||||
end
|
||||
end,
|
||||
|
@ -319,12 +390,38 @@ defs.maybe_longstring = setmetatable({
|
|||
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
|
||||
mknewline(defs, 1)
|
||||
|
||||
defs.whitespace = "self"
|
||||
defs.hexdigit = "alpha"
|
||||
defs["_"] = "alpha"
|
||||
defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs})
|
||||
function defs.alpha(state, token)
|
||||
state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff?
|
||||
collect_fallback(state, token)
|
||||
return "in_alpha"
|
||||
end
|
||||
defs.in_alpha[-1] = function(state, token, rule)
|
||||
if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then
|
||||
collect_fallback(state, token)
|
||||
else
|
||||
local key = table.concat(state[COLLECT])
|
||||
state[COLLECT] = nil
|
||||
local keyword = keywords[key]
|
||||
if keyword then
|
||||
state[#state+1] = keyword
|
||||
else
|
||||
local pos = #state
|
||||
state[pos+1] = TK_NAME
|
||||
state[pos+2] = key
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
setmetatable(defs, {__index=defs.base})
|
||||
|
||||
function defs.string_open(state, token)
|
||||
if not state.in_string then
|
||||
state[#state+1] = TK_STRING
|
||||
state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff?
|
||||
state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
|
||||
state.in_string = token
|
||||
return "string"
|
||||
end
|
||||
|
|
20
parser.lua
20
parser.lua
|
@ -24,6 +24,8 @@ local DATA = {}
|
|||
local GEN = {}
|
||||
-- key for DATA OFFSET
|
||||
local OFFDATA = {}
|
||||
-- key for End of Stream
|
||||
local EOZ = {}
|
||||
|
||||
local optimize_lookups = {}
|
||||
for i=0, 255 do
|
||||
|
@ -39,6 +41,9 @@ local function get_next_common(state, in_pos, token)
|
|||
if state[STATE] then
|
||||
local st = state[STATE]
|
||||
local rule = st[token]
|
||||
if not rule and token == EOZ then
|
||||
return in_pos, state
|
||||
end
|
||||
do -- pre-hooks
|
||||
local pos = -1
|
||||
local hook = st[pos]
|
||||
|
@ -83,7 +88,9 @@ local function get_next_common(state, in_pos, token)
|
|||
end
|
||||
|
||||
local function get_next_table(state, in_pos)
|
||||
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling
|
||||
if state[DATA] == nil or #state[DATA] == 0 then
|
||||
return get_next_common(state, in_pos, EOZ)
|
||||
end
|
||||
in_pos = in_pos + 1
|
||||
local token = state[DATA][in_pos - state[OFFDATA]]
|
||||
if token == nil then
|
||||
|
@ -95,7 +102,13 @@ local function get_next_table(state, in_pos)
|
|||
end
|
||||
|
||||
local function get_next_string(state, in_pos)
|
||||
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling
|
||||
if state[DATA] == nil or #state[DATA] == 0 then
|
||||
if state[STATE] == nil then
|
||||
return in_pos, state
|
||||
else
|
||||
return get_next_common(state, in_pos, EOZ)
|
||||
end
|
||||
end
|
||||
in_pos = in_pos + 1
|
||||
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
|
||||
if token == nil then
|
||||
|
@ -142,6 +155,7 @@ local COLLECT = {}
|
|||
return {
|
||||
STATE = STATE,
|
||||
COLLECT = COLLECT,
|
||||
EOZ = EOZ,
|
||||
stream = stream,
|
||||
parse = parse,
|
||||
-- common utility function
|
||||
|
@ -154,7 +168,7 @@ return {
|
|||
if not rule then
|
||||
local t = state[COLLECT]
|
||||
t[#t+1] = token
|
||||
if t.coalesce and #t > t.coalesce then
|
||||
if t.coalesce and #t >= t.coalesce then
|
||||
t[1] = table.concat(t)
|
||||
for i=2, #t do t[i] = nil end
|
||||
end
|
||||
|
|
102
test.lua
102
test.lua
|
@ -24,6 +24,14 @@ local function case()
|
|||
return caseno
|
||||
end
|
||||
|
||||
do -- basic check
|
||||
local case = case()
|
||||
local defs = {}
|
||||
local count = 0
|
||||
local state, err = parser.parse(defs, function() assert(count == 0, "should be called only once"); count = count + 1 return nil end)
|
||||
assert(state)
|
||||
end -- basic check
|
||||
|
||||
do -- trim left spaces
|
||||
local defs = {}
|
||||
defs.self = defs
|
||||
|
@ -82,6 +90,7 @@ do -- lua tokens
|
|||
else
|
||||
assert(state[1] == luatokens.tokens.TK_STRING)
|
||||
assert(state[2] == "hello world")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
||||
|
@ -101,6 +110,7 @@ do -- more lua tokens
|
|||
else
|
||||
assert(state[1] == luatokens.tokens.TK_STRING)
|
||||
assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
|
||||
assert(state.line == 2)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
||||
|
@ -119,6 +129,7 @@ do -- even more lua tokens
|
|||
else
|
||||
assert(state[1] == luatokens.tokens.TK_STRING)
|
||||
assert(state[2] == "A")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
||||
|
@ -157,6 +168,7 @@ do -- even more lua tokens
|
|||
assert(table.remove(state, 1) == "\252\132\128\128\128\128")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "\253\191\191\191\191\191")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
||||
|
@ -176,6 +188,7 @@ do -- simple lua tokens
|
|||
assert(table.remove(state, 1) == "[")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
||||
|
@ -194,8 +207,9 @@ do -- simple long string
|
|||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
end -- long string
|
||||
|
||||
do -- long string with depth 1
|
||||
local luatokens = require "luatokens"
|
||||
|
@ -212,8 +226,9 @@ do -- long string with depth 1
|
|||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
end -- long string
|
||||
|
||||
do -- long string with "nested" long string
|
||||
local luatokens = require "luatokens"
|
||||
|
@ -230,5 +245,86 @@ do -- long string with "nested" long string
|
|||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "[[]]")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
end -- long string
|
||||
|
||||
do -- long string edge cases
|
||||
local luatokens = require "luatokens"
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
print(case, err, etoken)
|
||||
for i,v in pairs(estate) do
|
||||
print(case, i, v)
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "]=")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "]")
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- long string
|
||||
|
||||
do -- keywords
|
||||
local luatokens = require "luatokens"
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, [[
|
||||
and break do else elseif end
|
||||
false for function goto if in
|
||||
local nil not or repeat return
|
||||
then true until while]])
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
print(case, err, etoken)
|
||||
for i,v in pairs(estate) do
|
||||
print(case, i, v)
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_AND)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_BREAK)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_DO)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_ELSE)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_ELSEIF)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_END)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_FALSE)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_FOR)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_FUNCTION)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_GOTO)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_IF)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_IN)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_LOCAL)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_NIL)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_NOT)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_OR)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_REPEAT)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_RETURN)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_THEN)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
|
||||
assert(state.line == 4)
|
||||
end
|
||||
end -- keywords
|
||||
|
||||
do -- FUCK
|
||||
local luatokens = require "luatokens"
|
||||
local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "5.1" or _VERSION == "5.2") and "*a" or "a")
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
print(case, err, etoken)
|
||||
for i,v in pairs(estate) do
|
||||
print(case, i, v)
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
end
|
||||
end -- FUCK
|
||||
|
|
Loading…
Reference in New Issue