It's working better now...
This commit is contained in:
parent
5a4b41bd47
commit
a6372171c0
|
@ -3,6 +3,9 @@
|
||||||
-- we need some stuff from here
|
-- we need some stuff from here
|
||||||
local parser = require "parser"
|
local parser = require "parser"
|
||||||
local selfify = parser.selfify
|
local selfify = parser.selfify
|
||||||
|
local EOF = parser.EOF
|
||||||
|
local COLLECT = parser.COLLECT
|
||||||
|
local collect_fallback = parser.collect_fallback
|
||||||
|
|
||||||
-- "dummies"
|
-- "dummies"
|
||||||
local TK_STRING = {}
|
local TK_STRING = {}
|
||||||
|
@ -106,25 +109,25 @@ do local tstring = selfify({})
|
||||||
["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
||||||
["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
|
||||||
[1] = linecount,
|
[1] = linecount,
|
||||||
[2] = print
|
|
||||||
}, {__index = tokens.base})
|
}, {__index = tokens.base})
|
||||||
tokens.string.escapes = tsescapes
|
tokens.string.escapes = tsescapes
|
||||||
tsescapes.string = tokens.string
|
tsescapes.string = tokens.string
|
||||||
|
|
||||||
function tsescapes.insertraw(state, token)
|
function tsescapes.insertraw(state, token)
|
||||||
state[#state+1] = token
|
collect_fallback(state, token)
|
||||||
return "string"
|
return "string"
|
||||||
end
|
end
|
||||||
|
|
||||||
do
|
do
|
||||||
local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
|
local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
|
||||||
function tsescapes.insertmap(state, token)
|
function tsescapes.insertmap(state, token)
|
||||||
state[#state+1] = map[token]
|
collect_fallback(state, map[token])
|
||||||
return "string"
|
return "string"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function tsescapes.digit(state, token)
|
function tsescapes.digit(state, token)
|
||||||
|
print(state, token)
|
||||||
local digit = string.find("1234567890", token, 1, true)
|
local digit = string.find("1234567890", token, 1, true)
|
||||||
local num = state.in_digit
|
local num = state.in_digit
|
||||||
if digit then
|
if digit then
|
||||||
|
@ -138,21 +141,21 @@ do local tstring = selfify({})
|
||||||
if num > 255 then
|
if num > 255 then
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
state[#state+1] = string.char(num)
|
collect_fallback(state, string.char(num))
|
||||||
state.in_digit = nil
|
state.in_digit = nil
|
||||||
state.c = nil
|
state.c = nil
|
||||||
return "string"
|
return "string"
|
||||||
end
|
end
|
||||||
tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring})
|
tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring})
|
||||||
|
|
||||||
tsescapes.hex = setmetatable(selfify({string = tokens.string}), {__index=tokens.base})
|
tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base})
|
||||||
function tsescapes.hex.hexdigit(state, token)
|
function tsescapes.hex.hexdigit(state, token)
|
||||||
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
|
local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
|
||||||
assert(digit, "this should never be called for non-hex-digits")
|
assert(digit, "this should never be called for non-hex-digits")
|
||||||
local num = state.in_hex
|
local num = state.in_hex
|
||||||
if num then
|
if num then
|
||||||
num = num * 16 + digit % 16
|
num = num * 16 + digit % 16
|
||||||
state[#state+1] = string.char(num)
|
collect_fallback(state, string.char(num))
|
||||||
state.in_hex = nil
|
state.in_hex = nil
|
||||||
return "string"
|
return "string"
|
||||||
else
|
else
|
||||||
|
@ -165,7 +168,7 @@ do local tstring = selfify({})
|
||||||
string = tokens.string,
|
string = tokens.string,
|
||||||
whitespace = "self",
|
whitespace = "self",
|
||||||
[""] = "string",
|
[""] = "string",
|
||||||
[1] = parser.insert_fallback,
|
[1] = collect_fallback,
|
||||||
[2] = linecount,
|
[2] = linecount,
|
||||||
})
|
})
|
||||||
local tbase = tokens.base
|
local tbase = tokens.base
|
||||||
|
@ -185,17 +188,13 @@ do local tstring = selfify({})
|
||||||
|
|
||||||
tstring[""] = "self"
|
tstring[""] = "self"
|
||||||
|
|
||||||
tstring[1] = parser.insert_fallback
|
tstring[1] = collect_fallback
|
||||||
|
|
||||||
function tstring.close(state, token)
|
function tstring.close(state, token)
|
||||||
if state.in_string == token then
|
if state.in_string == token then
|
||||||
local i = state.string_start
|
|
||||||
state.in_string = nil
|
state.in_string = nil
|
||||||
state.string_start = nil
|
state[#state+1] = table.concat(state[COLLECT])
|
||||||
state[i+1] = table.concat(state, '', i+1)
|
state[COLLECT] = nil
|
||||||
for j=i+2, #state do
|
|
||||||
state[j]=nil
|
|
||||||
end
|
|
||||||
return "tokens"
|
return "tokens"
|
||||||
else
|
else
|
||||||
state[#state+1] = token
|
state[#state+1] = token
|
||||||
|
@ -206,14 +205,15 @@ end
|
||||||
|
|
||||||
tokens["'"] = "string_open"
|
tokens["'"] = "string_open"
|
||||||
tokens['"'] = "string_open"
|
tokens['"'] = "string_open"
|
||||||
|
tokens[1] = linecount
|
||||||
|
|
||||||
setmetatable(tokens, {__index=whitespace})
|
setmetatable(tokens, {__index=whitespace})
|
||||||
|
|
||||||
function tokens.string_open(state, token)
|
function tokens.string_open(state, token)
|
||||||
if not state.in_string then
|
if not state.in_string then
|
||||||
state[#state+1] = TK_STRING
|
state[#state+1] = TK_STRING
|
||||||
|
state[COLLECT] = {}
|
||||||
state.in_string = token
|
state.in_string = token
|
||||||
state.string_start = #state
|
|
||||||
return "string"
|
return "string"
|
||||||
end
|
end
|
||||||
assert("this shouldn't happen")
|
assert("this shouldn't happen")
|
||||||
|
|
35
parser.lua
35
parser.lua
|
@ -25,6 +25,11 @@ local GEN = {}
|
||||||
-- key for DATA OFFSET
|
-- key for DATA OFFSET
|
||||||
local OFFDATA = {}
|
local OFFDATA = {}
|
||||||
|
|
||||||
|
local optimize_lookups = {}
|
||||||
|
for i=0, 255 do
|
||||||
|
optimize_lookups[i] = string.char(i)
|
||||||
|
end
|
||||||
|
|
||||||
local type, tostring
|
local type, tostring
|
||||||
= type, tostring
|
= type, tostring
|
||||||
|
|
||||||
|
@ -81,7 +86,7 @@ end
|
||||||
local function get_next_string(state, in_pos)
|
local function get_next_string(state, in_pos)
|
||||||
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
|
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
|
||||||
in_pos = in_pos + 1
|
in_pos = in_pos + 1
|
||||||
local token = state[DATA]:sub(in_pos - state[OFFDATA], in_pos - state[OFFDATA])
|
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or ""
|
||||||
if token == "" then
|
if token == "" then
|
||||||
state[OFFDATA] = in_pos - 1
|
state[OFFDATA] = in_pos - 1
|
||||||
state[DATA] = state[GEN]()
|
state[DATA] = state[GEN]()
|
||||||
|
@ -119,21 +124,25 @@ local function parse(defs, data)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- utility function that's quite common
|
-- not used by any of the above but useful for others
|
||||||
local function selfify(t)
|
|
||||||
t.self = t
|
local COLLECT = {}
|
||||||
return t
|
|
||||||
end
|
|
||||||
-- common hook
|
|
||||||
local function insert_fallback(state, token, rule)
|
|
||||||
if not rule then
|
|
||||||
state[#state+1] = token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
STATE = STATE,
|
STATE = STATE,
|
||||||
|
COLLECT = COLLECT,
|
||||||
stream = stream,
|
stream = stream,
|
||||||
parse = parse,
|
parse = parse,
|
||||||
selfify = selfify,
|
-- common utility function
|
||||||
|
selfify = function(t)
|
||||||
|
t.self = t
|
||||||
|
return t
|
||||||
|
end,
|
||||||
|
-- common hook
|
||||||
|
collect_fallback = function(state, token, rule)
|
||||||
|
if not rule then
|
||||||
|
local t = state[COLLECT]
|
||||||
|
t[#t+1] = token
|
||||||
|
end
|
||||||
|
end,
|
||||||
}
|
}
|
||||||
|
|
6
test.lua
6
test.lua
|
@ -100,6 +100,12 @@ do -- more lua tokens
|
||||||
else
|
else
|
||||||
for i,v in ipairs(state) do
|
for i,v in ipairs(state) do
|
||||||
print(case, i, v)
|
print(case, i, v)
|
||||||
|
if v == luatokens.TK_STRING then
|
||||||
|
in_string = true
|
||||||
|
elseif in_string then
|
||||||
|
print(case, v:gsub(".", function(v) return "\\"..string.byte(v) end))
|
||||||
|
in_string = false
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
print(case, "---- OUT TOKENS ----")
|
print(case, "---- OUT TOKENS ----")
|
||||||
|
|
Loading…
Reference in New Issue