diff --git a/luatokens.lua b/luatokens.lua index 58a7d09..226a81a 100644 --- a/luatokens.lua +++ b/luatokens.lua @@ -99,17 +99,21 @@ defs.base = { ["Z"] = "alpha", } -local function linecount(state, token, rule) - -- TODO fix - if token == "\n" or token == "\r" then - state.line = (state.line or 1) + 1 - end +local function countline(state, token, rule) + state.line = (state.line or 1) + 1 +end + +local function mknewline(t, hookn, fallback) + fallback = fallback or t + t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback}) + t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback}) + return t end do local tstring = selfify({}) defs.string = tstring tstring.defs = defs - do local tsescapes = setmetatable({ + do local tsescapes = setmetatable(mknewline({ ["'"] = "insertraw", ['"'] = "insertraw", ['\\'] = "insertraw", @@ -123,11 +127,10 @@ do local tstring = selfify({}) ["z"] = "skipwhitespace", ["u"] = "unicode", ["x"] = "hex", - ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), - ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), - [1] = linecount, - [2] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end, - }, {__index = defs.base}) + --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), + --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), + [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end, + }, 1, tstring), {__index = defs.base}) defs.string.escapes = tsescapes tsescapes.string = defs.string @@ -237,15 +240,16 @@ do local tstring = selfify({}) tsescapes.unicode = tseunicode end - do local tseskipwhitespace = selfify({ + do local tseskipwhitespace = selfify(mknewline({ string = defs.string, whitespace = "self", [""] = "string", [1] = collect_fallback, - [2] = linecount, - }) + }, 2)) + --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace}) + --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace}) local tbase = defs.base - local tbasemap = {whitespace = "whitespace", newline = "whitespace"} + local tbasemap = {whitespace = "whitespace"} setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end}) tsescapes.skipwhitespace = tseskipwhitespace end @@ -276,21 +280,51 @@ do local tstring = selfify({}) end end -do local tlongstring = {} +do local tlongstring = selfify({}) + defs.longstring = tlongstring -- TODO end defs["'"] = "string_open" defs['"'] = "string_open" defs["["] = "maybe_longstring" -defs[1] = linecount +defs.maybe_longstring = setmetatable({ + defs = defs, + ['['] = "longstring_open", + ['='] = "longstring_open", + longstring_count = selfify({ + ["="] = function(state, token) + state.longstring_count = state.longstring_count + 1 + return "self" + end, + longstring = defs.longstring + }), + longstring_open = function(state, token) + if token == "=" then + state.longstring_count = state.longstring_count or 0 + 1 + return "longstring_count" + elseif token == "[" then + return "longstring" + end + end, + [-1] = function(state, token, rule) + if rule ~= "longstring_open" then + state[#state+1] = "[" + end + end +}, {__index=defs}) -setmetatable(defs, {__index=whitespace}) +-- these are needed for proper line counts +--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs}) +--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs}) +mknewline(defs, 1) + +setmetatable(defs, {__index=defs.base}) function defs.string_open(state, token) if not state.in_string then state[#state+1] = TK_STRING - state[COLLECT] = {} + state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff? state.in_string = token return "string" end diff --git a/parser.lua b/parser.lua index 0cd2853..bfa7dd3 100644 --- a/parser.lua +++ b/parser.lua @@ -39,6 +39,17 @@ local function get_next_common(state, in_pos, token) if state[STATE] then local st = state[STATE] local rule = st[token] + do -- pre-hooks + local pos = -1 + local hook = st[pos] + while hook ~= nil do + if hook then + hook(state, token, rule) + end + pos = pos - 1 + hook = st[pos] + end + end transition = rule if transition == nil then transition = st[""] @@ -143,6 +154,10 @@ return { if not rule then local t = state[COLLECT] t[#t+1] = token + if t.coalesce and #t > t.coalesce then + t[1] = table.concat(t) + for i=2, #t do t[i] = nil end + end end end, } diff --git a/test.lua b/test.lua index 283b566..8672903 100644 --- a/test.lua +++ b/test.lua @@ -159,3 +159,76 @@ do -- even more lua tokens assert(table.remove(state, 1) == "\253\191\191\191\191\191") end end -- lua tokens + +do -- simple lua tokens + local luatokens = require "luatokens" + local tokens = luatokens.defs + local state, err, etoken, estate = parser.parse(tokens, [[[""]]) + local case = case() + if not state then + print(case, "---- IN TOKENS ----") + print(case, err, etoken) + for i,v in pairs(estate) do + print(case, i, v) + end + print(case, "---- OUT TOKENS ----") + else + assert(table.remove(state, 1) == "[") + assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) + assert(table.remove(state, 1) == "") + end +end -- lua tokens + +do -- simple long string + local luatokens = require "luatokens" + local tokens = luatokens.defs + local state, err, etoken, estate = parser.parse(tokens, [=[[[]]]=]) + local case = case() + if not state then + print(case, "---- IN TOKENS ----") + print(case, err, etoken) + for i,v in pairs(estate) do + print(case, i, v) + end + print(case, "---- OUT TOKENS ----") + else + assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) + assert(table.remove(state, 1) == "") + end +end -- lua tokens + +do -- long string with depth 1 + local luatokens = require "luatokens" + local tokens = luatokens.defs + local state, err, etoken, estate = parser.parse(tokens, [==[[=[]=]]==]) + local case = case() + if not state then + print(case, "---- IN TOKENS ----") + print(case, err, etoken) + for i,v in pairs(estate) do + print(case, i, v) + end + print(case, "---- OUT TOKENS ----") + else + assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) + assert(table.remove(state, 1) == "") + end +end -- lua tokens + +do -- long string with "nested" long string + local luatokens = require "luatokens" + local tokens = luatokens.defs + local state, err, etoken, estate = parser.parse(tokens, [==[[=[[[]]]=]]==]) + local case = case() + if not state then + print(case, "---- IN TOKENS ----") + print(case, err, etoken) + for i,v in pairs(estate) do + print(case, i, v) + end + print(case, "---- OUT TOKENS ----") + else + assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) + assert(table.remove(state, 1) == "[[]]") + end +end -- lua tokens