It's working better now...
This commit is contained in:
parent
5a4b41bd47
commit
a6372171c0
|
@ -3,6 +3,9 @@
|
|||
-- we need some stuff from here
|
||||
local parser = require "parser"
|
||||
local selfify = parser.selfify
|
||||
local EOF = parser.EOF
|
||||
local COLLECT = parser.COLLECT
|
||||
local collect_fallback = parser.collect_fallback
|
||||
|
||||
-- "dummies"
|
||||
local TK_STRING = {}
|
||||
|
@ -106,25 +109,25 @@ do local tstring = selfify({})
|
|||
["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
||||
["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
||||
[1] = linecount,
|
||||
[2] = print
|
||||
}, {__index = tokens.base})
|
||||
tokens.string.escapes = tsescapes
|
||||
tsescapes.string = tokens.string
|
||||
|
||||
function tsescapes.insertraw(state, token)
|
||||
state[#state+1] = token
|
||||
collect_fallback(state, token)
|
||||
return "string"
|
||||
end
|
||||
|
||||
do
|
||||
local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
|
||||
function tsescapes.insertmap(state, token)
|
||||
state[#state+1] = map[token]
|
||||
collect_fallback(state, map[token])
|
||||
return "string"
|
||||
end
|
||||
end
|
||||
|
||||
function tsescapes.digit(state, token)
|
||||
print(state, token)
|
||||
local digit = string.find("1234567890", token, 1, true)
|
||||
local num = state.in_digit
|
||||
if digit then
|
||||
|
@ -138,21 +141,21 @@ do local tstring = selfify({})
|
|||
if num > 255 then
|
||||
return nil
|
||||
end
|
||||
state[#state+1] = string.char(num)
|
||||
collect_fallback(state, string.char(num))
|
||||
state.in_digit = nil
|
||||
state.c = nil
|
||||
return "string"
|
||||
end
|
||||
tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring})
|
||||
|
||||
tsescapes.hex = setmetatable(selfify({string = tokens.string}), {__index=tokens.base})
|
||||
tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base})
|
||||
function tsescapes.hex.hexdigit(state, token)
|
||||
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
|
||||
assert(digit, "this should never be called for non-hex-digits")
|
||||
local num = state.in_hex
|
||||
if num then
|
||||
num = num * 16 + digit % 16
|
||||
state[#state+1] = string.char(num)
|
||||
collect_fallback(state, string.char(num))
|
||||
state.in_hex = nil
|
||||
return "string"
|
||||
else
|
||||
|
@ -165,7 +168,7 @@ do local tstring = selfify({})
|
|||
string = tokens.string,
|
||||
whitespace = "self",
|
||||
[""] = "string",
|
||||
[1] = parser.insert_fallback,
|
||||
[1] = collect_fallback,
|
||||
[2] = linecount,
|
||||
})
|
||||
local tbase = tokens.base
|
||||
|
@ -185,17 +188,13 @@ do local tstring = selfify({})
|
|||
|
||||
tstring[""] = "self"
|
||||
|
||||
tstring[1] = parser.insert_fallback
|
||||
tstring[1] = collect_fallback
|
||||
|
||||
function tstring.close(state, token)
|
||||
if state.in_string == token then
|
||||
local i = state.string_start
|
||||
state.in_string = nil
|
||||
state.string_start = nil
|
||||
state[i+1] = table.concat(state, '', i+1)
|
||||
for j=i+2, #state do
|
||||
state[j]=nil
|
||||
end
|
||||
state[#state+1] = table.concat(state[COLLECT])
|
||||
state[COLLECT] = nil
|
||||
return "tokens"
|
||||
else
|
||||
state[#state+1] = token
|
||||
|
@ -206,14 +205,15 @@ end
|
|||
|
||||
tokens["'"] = "string_open"
|
||||
tokens['"'] = "string_open"
|
||||
tokens[1] = linecount
|
||||
|
||||
setmetatable(tokens, {__index=whitespace})
|
||||
|
||||
function tokens.string_open(state, token)
|
||||
if not state.in_string then
|
||||
state[#state+1] = TK_STRING
|
||||
state[COLLECT] = {}
|
||||
state.in_string = token
|
||||
state.string_start = #state
|
||||
return "string"
|
||||
end
|
||||
assert("this shouldn't happen")
|
||||
|
|
35
parser.lua
35
parser.lua
|
@ -25,6 +25,11 @@ local GEN = {}
|
|||
-- key for DATA OFFSET
|
||||
local OFFDATA = {}
|
||||
|
||||
local optimize_lookups = {}
|
||||
for i=0, 255 do
|
||||
optimize_lookups[i] = string.char(i)
|
||||
end
|
||||
|
||||
local type, tostring
|
||||
= type, tostring
|
||||
|
||||
|
@ -81,7 +86,7 @@ end
|
|||
local function get_next_string(state, in_pos)
|
||||
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
|
||||
in_pos = in_pos + 1
|
||||
local token = state[DATA]:sub(in_pos - state[OFFDATA], in_pos - state[OFFDATA])
|
||||
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or ""
|
||||
if token == "" then
|
||||
state[OFFDATA] = in_pos - 1
|
||||
state[DATA] = state[GEN]()
|
||||
|
@ -119,21 +124,25 @@ local function parse(defs, data)
|
|||
end
|
||||
end
|
||||
|
||||
-- utility function that's quite common
|
||||
local function selfify(t)
|
||||
t.self = t
|
||||
return t
|
||||
end
|
||||
-- common hook
|
||||
local function insert_fallback(state, token, rule)
|
||||
if not rule then
|
||||
state[#state+1] = token
|
||||
end
|
||||
end
|
||||
-- not used by any of the above but useful for others
|
||||
|
||||
local COLLECT = {}
|
||||
|
||||
return {
|
||||
STATE = STATE,
|
||||
COLLECT = COLLECT,
|
||||
stream = stream,
|
||||
parse = parse,
|
||||
selfify = selfify,
|
||||
-- common utility function
|
||||
selfify = function(t)
|
||||
t.self = t
|
||||
return t
|
||||
end,
|
||||
-- common hook
|
||||
collect_fallback = function(state, token, rule)
|
||||
if not rule then
|
||||
local t = state[COLLECT]
|
||||
t[#t+1] = token
|
||||
end
|
||||
end,
|
||||
}
|
||||
|
|
6
test.lua
6
test.lua
|
@ -100,6 +100,12 @@ do -- more lua tokens
|
|||
else
|
||||
for i,v in ipairs(state) do
|
||||
print(case, i, v)
|
||||
if v == luatokens.TK_STRING then
|
||||
in_string = true
|
||||
elseif in_string then
|
||||
print(case, v:gsub(".", function(v) return "\\"..string.byte(v) end))
|
||||
in_string = false
|
||||
end
|
||||
end
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
|
|
Loading…
Reference in New Issue