120 lines
3.8 KiB
Lua
120 lines
3.8 KiB
Lua
|
--[[
|
||
|
parser.lua - table based parsing
|
||
|
Copyright (C) 2019 Soni L.
|
||
|
|
||
|
This program is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU Affero General Public License as published by
|
||
|
the Free Software Foundation, either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU Affero General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Affero General Public License
|
||
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
|
--]]
|
||
|
|
||
|
-- key for STATE
|
||
|
local STATE = {}
|
||
|
-- key for DATA
|
||
|
local DATA = {}
|
||
|
-- key for GENERATOR
|
||
|
local GEN = {}
|
||
|
-- key for DATA OFFSET
|
||
|
local OFFDATA = {}
|
||
|
|
||
|
local type, tostring
|
||
|
= type, tostring
|
||
|
|
||
|
local function get_next_common(state, in_pos, token)
|
||
|
-- note: must preserve "token" - do not call recursively with a different token
|
||
|
local transition
|
||
|
if state[STATE] ~= nil then
|
||
|
transition = state[STATE][token]
|
||
|
if not transition then
|
||
|
transition = state[STATE][""]
|
||
|
end
|
||
|
local recheck = true
|
||
|
while recheck do
|
||
|
recheck = false
|
||
|
local tytrans = type(transition)
|
||
|
if tytrans == "string" then
|
||
|
transition = state[STATE][transition]
|
||
|
recheck = true
|
||
|
elseif tytrans == "function" then
|
||
|
transition = transition(state, token)
|
||
|
recheck = true
|
||
|
end
|
||
|
end
|
||
|
state[STATE] = transition -- may be nil
|
||
|
end
|
||
|
-- must NOT use elseif here - the above may set state to nil!
|
||
|
if state[STATE] == nil then
|
||
|
-- unexpected token. stream consumer may attempt to recover,
|
||
|
-- but we do this mostly to differentiate it from "end of stream" condition.
|
||
|
return in_pos - 1, nil, "unexpected token", token, state
|
||
|
end
|
||
|
return in_pos, state, transition -- TODO is this what we should be returning?
|
||
|
end
|
||
|
|
||
|
local function get_next_table(state, in_pos)
|
||
|
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
|
||
|
in_pos = in_pos + 1
|
||
|
local token = state[DATA][in_pos - state[OFFDATA]]
|
||
|
if token == nil then
|
||
|
state[OFFDATA] = in_pos - 1
|
||
|
state[DATA] = state[GEN]()
|
||
|
return get_next_table(state, state[OFFDATA])
|
||
|
end
|
||
|
return get_next_common(state, in_pos, token)
|
||
|
end
|
||
|
|
||
|
local function get_next_string(state, in_pos)
|
||
|
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
|
||
|
in_pos = in_pos + 1
|
||
|
local token = state[DATA]:sub(in_pos - state[OFFDATA], in_pos - state[OFFDATA])
|
||
|
if token == "" then
|
||
|
state[OFFDATA] = in_pos - 1
|
||
|
state[DATA] = state[GEN]()
|
||
|
return get_next_string(state, state[OFFDATA])
|
||
|
end
|
||
|
return get_next_common(state, in_pos, token)
|
||
|
end
|
||
|
|
||
|
local function stream(defs, data)
|
||
|
local state = {}
|
||
|
local fn
|
||
|
state[STATE] = defs
|
||
|
if type(data) == "string" then
|
||
|
state[DATA] = data
|
||
|
state[GEN] = function() end
|
||
|
fn = get_next_string
|
||
|
else
|
||
|
state[DATA] = data()
|
||
|
state[GEN] = data
|
||
|
fn = type(state[DATA]) == "string" and get_next_string or get_next_table
|
||
|
end
|
||
|
state[OFFDATA] = 0
|
||
|
return fn, state, state[OFFDATA]
|
||
|
end
|
||
|
|
||
|
local function parse(defs, data)
|
||
|
for pos, state, transemsg, etoken, estate in stream(defs, data) do
|
||
|
if not state then
|
||
|
-- parse error
|
||
|
return nil, transemsg, etoken, estate
|
||
|
elseif not transemsg then
|
||
|
-- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
|
||
|
return state
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return {
|
||
|
STATE = STATE,
|
||
|
stream = stream,
|
||
|
parse = parse,
|
||
|
}
|