lua.cratera/parser.lua

203 lines
6.4 KiB
Lua

--[[
parser.lua - table based parsing
Copyright (C) 2019 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
local function ts(self) return getmetatable(self).__name end
-- key for STATE
local STATE = setmetatable({}, {__name="STATE", __tostring=ts})
-- key for DATA
local DATA = setmetatable({}, {__name="DATA", __tostring=ts})
-- key for GENERATOR
local GEN = setmetatable({}, {__name="GEN", __tostring=ts})
-- key for DATA OFFSET
local OFFDATA = setmetatable({}, {__name="OFFDATA", __tostring=ts})
-- key for End of Stream
local EOZ = setmetatable({}, {__name="EOZ", __tostring=ts})
-- key for number rules (prevent conflict with hooks)
local NUMBER = setmetatable({}, {__name="NUMBER", __tostring=ts})
-- key for fallback rules (prevent conflict with empty string)
local FALLBACK = setmetatable({}, {__name="FALLBACK", __tostring=ts})
local optimize_lookups = {}
for i=0, 255 do
optimize_lookups[i] = string.char(i)
end
local type, tostring, string_byte
= type, tostring, string.byte
local function get_next_common(state, in_pos, token)
-- note: must preserve "token" - do not call recursively with a different token
local transition, retry
local st = state[STATE]
if st then
local rule = st[token]
if not rule and token == EOZ then
return in_pos, state
end
if type(token) == "number" then
rule = st[NUMBER]
end
do -- pre-hooks
local pos = -1
local hook = st[pos]
while hook ~= nil do
if hook then
hook(state, token, rule)
end
pos = pos - 1
hook = st[pos]
end
end
transition = rule
if transition == nil then
transition = st[FALLBACK]
end
local recheck = true
while recheck do
recheck = false
local tytrans = type(transition)
if tytrans == "string" then
transition = st[transition]
recheck = true
elseif tytrans == "function" then
transition, retry = transition(state, token)
recheck = true
elseif tytrans == "table" and st[transition] ~= nil then
transition = st[transition]
recheck = true
end
end
do -- post-hooks CANNOT USE ipairs HERE BECAUSE Lua 5.1/5.2
local pos = 1
local hook = st[pos]
while hook ~= nil do
if hook then
hook(state, token, rule)
end
pos = pos + 1
hook = st[pos]
end
end
state[STATE] = transition -- may be nil or false
end
-- must NOT use elseif here - the above may set state to nil or false!
if not state[STATE] then
-- unexpected token. stream consumer may attempt to recover,
-- but we do this mostly to differentiate it from "end of stream" condition.
return in_pos - 1, nil, "unexpected token", token, state, st
end
if retry then in_pos = in_pos - 1 end
return in_pos, state, transition -- TODO is this what we should be returning?
end
local function get_next_table(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then
if state[STATE] == nil then
return in_pos, state
else
return get_next_common(state, in_pos, EOZ)
end
end
in_pos = in_pos + 1
local token = state[DATA][in_pos - state[OFFDATA]]
if token == nil then
state[OFFDATA] = in_pos - 1
state[DATA] = state[GEN]()
return get_next_table(state, state[OFFDATA])
end
return get_next_common(state, in_pos, token)
end
local function get_next_string(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then
if state[STATE] == nil then
return in_pos, state
else
return get_next_common(state, in_pos, EOZ)
end
end
in_pos = in_pos + 1
local token = optimize_lookups[string_byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
if token == nil then
state[OFFDATA] = in_pos - 1
state[DATA] = state[GEN]()
return get_next_string(state, state[OFFDATA])
end
return get_next_common(state, in_pos, token)
end
local function stream(defs, data, state)
local state = state or {}
local fn
state[STATE] = defs
if type(data) == "function" then
state[DATA] = data()
state[GEN] = data
else
state[DATA] = data
state[GEN] = function() end
end
fn = type(state[DATA]) == "table" and get_next_table or get_next_string
state[OFFDATA] = 0
return fn, state, state[OFFDATA]
end
local function parse(defs, data, state)
for pos, state, transemsg, etoken, estate in stream(defs, data, state) do
if not state then
-- parse error
return nil, transemsg, etoken, estate
elseif not transemsg then
-- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
return state
end
end
end
-- not used by any of the above but useful for others
local COLLECT = {}
return {
STATE = STATE,
DATA = DATA,
COLLECT = COLLECT,
EOZ = EOZ,
FALLBACK = FALLBACK,
NUMBER = NUMBER,
stream = stream,
parse = parse,
-- common utility function
selfify = function(t, id)
t[id or "self"] = t
return t
end,
-- common hook
collect_fallback = function(state, token, rule)
if not rule then
local t = state[COLLECT]
t[#t+1] = token
if t.coalesce and #t >= t.coalesce then
t[1] = table.concat(t)
for i=2, #t do t[i] = nil end
end
end
end,
}