2019-03-31 16:02:57 +02:00
|
|
|
--[[
|
|
|
|
parser.lua - table based parsing
|
|
|
|
Copyright (C) 2019 Soni L.
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU Affero General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
--]]
|
|
|
|
|
|
|
|
-- key for STATE
|
|
|
|
local STATE = {}
|
|
|
|
-- key for DATA
|
|
|
|
local DATA = {}
|
|
|
|
-- key for GENERATOR
|
|
|
|
local GEN = {}
|
|
|
|
-- key for DATA OFFSET
|
|
|
|
local OFFDATA = {}
|
2019-04-07 17:45:34 +02:00
|
|
|
-- key for End of Stream
|
|
|
|
local EOZ = {}
|
2019-03-31 16:02:57 +02:00
|
|
|
|
2019-04-06 16:25:44 +02:00
|
|
|
local optimize_lookups = {}
|
|
|
|
for i=0, 255 do
|
|
|
|
optimize_lookups[i] = string.char(i)
|
|
|
|
end
|
|
|
|
|
2019-03-31 16:02:57 +02:00
|
|
|
local type, tostring
|
|
|
|
= type, tostring
|
|
|
|
|
|
|
|
local function get_next_common(state, in_pos, token)
|
|
|
|
-- note: must preserve "token" - do not call recursively with a different token
|
|
|
|
local transition
|
2019-04-03 22:08:29 +02:00
|
|
|
if state[STATE] then
|
|
|
|
local st = state[STATE]
|
|
|
|
local rule = st[token]
|
2019-04-07 17:45:34 +02:00
|
|
|
if not rule and token == EOZ then
|
|
|
|
return in_pos, state
|
|
|
|
end
|
2019-04-07 02:38:10 +02:00
|
|
|
do -- pre-hooks
|
|
|
|
local pos = -1
|
|
|
|
local hook = st[pos]
|
|
|
|
while hook ~= nil do
|
|
|
|
if hook then
|
|
|
|
hook(state, token, rule)
|
|
|
|
end
|
|
|
|
pos = pos - 1
|
|
|
|
hook = st[pos]
|
|
|
|
end
|
|
|
|
end
|
2019-04-03 22:08:29 +02:00
|
|
|
transition = rule
|
|
|
|
if transition == nil then
|
|
|
|
transition = st[""]
|
2019-03-31 16:02:57 +02:00
|
|
|
end
|
|
|
|
local recheck = true
|
|
|
|
while recheck do
|
|
|
|
recheck = false
|
|
|
|
local tytrans = type(transition)
|
|
|
|
if tytrans == "string" then
|
2019-04-03 22:08:29 +02:00
|
|
|
transition = st[transition]
|
2019-03-31 16:02:57 +02:00
|
|
|
recheck = true
|
|
|
|
elseif tytrans == "function" then
|
|
|
|
transition = transition(state, token)
|
|
|
|
recheck = true
|
|
|
|
end
|
|
|
|
end
|
2019-04-07 17:54:48 +02:00
|
|
|
do -- post-hooks CANNOT USE ipairs HERE BECAUSE Lua 5.1/5.2
|
|
|
|
local pos = 1
|
|
|
|
local hook = st[pos]
|
|
|
|
while hook ~= nil do
|
|
|
|
if hook then
|
|
|
|
hook(state, token, rule)
|
|
|
|
end
|
|
|
|
pos = pos + 1
|
|
|
|
hook = st[pos]
|
2019-04-03 22:08:29 +02:00
|
|
|
end
|
|
|
|
end
|
|
|
|
state[STATE] = transition -- may be nil or false
|
2019-03-31 16:02:57 +02:00
|
|
|
end
|
2019-04-03 22:08:29 +02:00
|
|
|
-- must NOT use elseif here - the above may set state to nil or false!
|
|
|
|
if not state[STATE] then
|
2019-03-31 16:02:57 +02:00
|
|
|
-- unexpected token. stream consumer may attempt to recover,
|
|
|
|
-- but we do this mostly to differentiate it from "end of stream" condition.
|
|
|
|
return in_pos - 1, nil, "unexpected token", token, state
|
|
|
|
end
|
|
|
|
return in_pos, state, transition -- TODO is this what we should be returning?
|
|
|
|
end
|
|
|
|
|
|
|
|
local function get_next_table(state, in_pos)
|
2019-04-07 17:45:34 +02:00
|
|
|
if state[DATA] == nil or #state[DATA] == 0 then
|
|
|
|
return get_next_common(state, in_pos, EOZ)
|
|
|
|
end
|
2019-03-31 16:02:57 +02:00
|
|
|
in_pos = in_pos + 1
|
|
|
|
local token = state[DATA][in_pos - state[OFFDATA]]
|
|
|
|
if token == nil then
|
|
|
|
state[OFFDATA] = in_pos - 1
|
|
|
|
state[DATA] = state[GEN]()
|
|
|
|
return get_next_table(state, state[OFFDATA])
|
|
|
|
end
|
|
|
|
return get_next_common(state, in_pos, token)
|
|
|
|
end
|
|
|
|
|
|
|
|
local function get_next_string(state, in_pos)
|
2019-04-07 17:45:34 +02:00
|
|
|
if state[DATA] == nil or #state[DATA] == 0 then
|
|
|
|
if state[STATE] == nil then
|
|
|
|
return in_pos, state
|
|
|
|
else
|
|
|
|
return get_next_common(state, in_pos, EOZ)
|
|
|
|
end
|
|
|
|
end
|
2019-03-31 16:02:57 +02:00
|
|
|
in_pos = in_pos + 1
|
2019-04-06 22:42:00 +02:00
|
|
|
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
|
|
|
|
if token == nil then
|
2019-03-31 16:02:57 +02:00
|
|
|
state[OFFDATA] = in_pos - 1
|
|
|
|
state[DATA] = state[GEN]()
|
|
|
|
return get_next_string(state, state[OFFDATA])
|
|
|
|
end
|
|
|
|
return get_next_common(state, in_pos, token)
|
|
|
|
end
|
|
|
|
|
|
|
|
local function stream(defs, data)
|
|
|
|
local state = {}
|
|
|
|
local fn
|
|
|
|
state[STATE] = defs
|
|
|
|
if type(data) == "string" then
|
|
|
|
state[DATA] = data
|
|
|
|
state[GEN] = function() end
|
|
|
|
fn = get_next_string
|
|
|
|
else
|
|
|
|
state[DATA] = data()
|
|
|
|
state[GEN] = data
|
|
|
|
fn = type(state[DATA]) == "string" and get_next_string or get_next_table
|
|
|
|
end
|
|
|
|
state[OFFDATA] = 0
|
|
|
|
return fn, state, state[OFFDATA]
|
|
|
|
end
|
|
|
|
|
|
|
|
local function parse(defs, data)
|
|
|
|
for pos, state, transemsg, etoken, estate in stream(defs, data) do
|
|
|
|
if not state then
|
|
|
|
-- parse error
|
|
|
|
return nil, transemsg, etoken, estate
|
|
|
|
elseif not transemsg then
|
|
|
|
-- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
|
|
|
|
return state
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-06 16:25:44 +02:00
|
|
|
-- not used by any of the above but useful for others
|
|
|
|
|
|
|
|
local COLLECT = {}
|
2019-04-03 22:08:29 +02:00
|
|
|
|
2019-03-31 16:02:57 +02:00
|
|
|
return {
|
|
|
|
STATE = STATE,
|
2019-04-06 16:25:44 +02:00
|
|
|
COLLECT = COLLECT,
|
2019-04-07 17:45:34 +02:00
|
|
|
EOZ = EOZ,
|
2019-03-31 16:02:57 +02:00
|
|
|
stream = stream,
|
|
|
|
parse = parse,
|
2019-04-06 16:25:44 +02:00
|
|
|
-- common utility function
|
2019-04-06 18:32:37 +02:00
|
|
|
selfify = function(t, id)
|
|
|
|
t[id or "self"] = t
|
2019-04-06 16:25:44 +02:00
|
|
|
return t
|
|
|
|
end,
|
|
|
|
-- common hook
|
|
|
|
collect_fallback = function(state, token, rule)
|
|
|
|
if not rule then
|
|
|
|
local t = state[COLLECT]
|
|
|
|
t[#t+1] = token
|
2019-04-07 17:45:34 +02:00
|
|
|
if t.coalesce and #t >= t.coalesce then
|
2019-04-07 02:38:10 +02:00
|
|
|
t[1] = table.concat(t)
|
|
|
|
for i=2, #t do t[i] = nil end
|
|
|
|
end
|
2019-04-06 16:25:44 +02:00
|
|
|
end
|
|
|
|
end,
|
2019-03-31 16:02:57 +02:00
|
|
|
}
|