Lots of todos still
This commit is contained in:
		
							parent
							
								
									0118cdcb80
								
							
						
					
					
						commit
						d50ad87794
					
				
					 3 changed files with 141 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -99,17 +99,21 @@ defs.base = {
 | 
			
		|||
    ["Z"] = "alpha",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
local function linecount(state, token, rule)
 | 
			
		||||
    -- TODO fix
 | 
			
		||||
    if token == "\n" or token == "\r" then
 | 
			
		||||
        state.line = (state.line or 1) + 1
 | 
			
		||||
    end
 | 
			
		||||
local function countline(state, token, rule)
 | 
			
		||||
    state.line = (state.line or 1) + 1
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function mknewline(t, hookn, fallback)
 | 
			
		||||
    fallback = fallback or t
 | 
			
		||||
    t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
 | 
			
		||||
    t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
 | 
			
		||||
    return t
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
do local tstring = selfify({})
 | 
			
		||||
    defs.string = tstring
 | 
			
		||||
    tstring.defs = defs
 | 
			
		||||
    do local tsescapes = setmetatable({
 | 
			
		||||
    do local tsescapes = setmetatable(mknewline({
 | 
			
		||||
            ["'"] = "insertraw",
 | 
			
		||||
            ['"'] = "insertraw",
 | 
			
		||||
            ['\\'] = "insertraw",
 | 
			
		||||
| 
						 | 
				
			
			@ -123,11 +127,10 @@ do local tstring = selfify({})
 | 
			
		|||
            ["z"] = "skipwhitespace",
 | 
			
		||||
            ["u"] = "unicode",
 | 
			
		||||
            ["x"] = "hex",
 | 
			
		||||
            ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
 | 
			
		||||
            ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
 | 
			
		||||
            [1] = linecount,
 | 
			
		||||
            [2] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
 | 
			
		||||
        }, {__index = defs.base})
 | 
			
		||||
            --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
 | 
			
		||||
            --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
 | 
			
		||||
            [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
 | 
			
		||||
        }, 1, tstring), {__index = defs.base})
 | 
			
		||||
        defs.string.escapes = tsescapes
 | 
			
		||||
        tsescapes.string = defs.string
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -237,15 +240,16 @@ do local tstring = selfify({})
 | 
			
		|||
            tsescapes.unicode = tseunicode
 | 
			
		||||
        end
 | 
			
		||||
 | 
			
		||||
        do local tseskipwhitespace = selfify({
 | 
			
		||||
        do local tseskipwhitespace = selfify(mknewline({
 | 
			
		||||
                string = defs.string,
 | 
			
		||||
                whitespace = "self",
 | 
			
		||||
                [""] = "string",
 | 
			
		||||
                [1] = collect_fallback,
 | 
			
		||||
                [2] = linecount,
 | 
			
		||||
            })
 | 
			
		||||
            }, 2))
 | 
			
		||||
            --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
 | 
			
		||||
            --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
 | 
			
		||||
            local tbase = defs.base
 | 
			
		||||
            local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
 | 
			
		||||
            local tbasemap = {whitespace = "whitespace"}
 | 
			
		||||
            setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
 | 
			
		||||
            tsescapes.skipwhitespace =  tseskipwhitespace
 | 
			
		||||
        end
 | 
			
		||||
| 
						 | 
				
			
			@ -276,21 +280,51 @@ do local tstring = selfify({})
 | 
			
		|||
    end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
do local tlongstring = {}
 | 
			
		||||
do local tlongstring = selfify({})
 | 
			
		||||
    defs.longstring = tlongstring
 | 
			
		||||
    -- TODO
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
defs["'"] = "string_open"
 | 
			
		||||
defs['"'] = "string_open"
 | 
			
		||||
defs["["] = "maybe_longstring"
 | 
			
		||||
defs[1] = linecount
 | 
			
		||||
defs.maybe_longstring = setmetatable({
 | 
			
		||||
    defs = defs,
 | 
			
		||||
    ['['] = "longstring_open",
 | 
			
		||||
    ['='] = "longstring_open",
 | 
			
		||||
    longstring_count = selfify({
 | 
			
		||||
        ["="] = function(state, token)
 | 
			
		||||
            state.longstring_count = state.longstring_count + 1
 | 
			
		||||
            return "self"
 | 
			
		||||
        end,
 | 
			
		||||
        longstring = defs.longstring
 | 
			
		||||
    }),
 | 
			
		||||
    longstring_open = function(state, token)
 | 
			
		||||
        if token == "=" then
 | 
			
		||||
            state.longstring_count = state.longstring_count or 0 + 1
 | 
			
		||||
            return "longstring_count"
 | 
			
		||||
        elseif token == "[" then
 | 
			
		||||
            return "longstring"
 | 
			
		||||
        end
 | 
			
		||||
    end,
 | 
			
		||||
    [-1] = function(state, token, rule)
 | 
			
		||||
        if rule ~= "longstring_open" then
 | 
			
		||||
            state[#state+1] = "["
 | 
			
		||||
        end
 | 
			
		||||
    end
 | 
			
		||||
}, {__index=defs})
 | 
			
		||||
 | 
			
		||||
setmetatable(defs, {__index=whitespace})
 | 
			
		||||
-- these are needed for proper line counts
 | 
			
		||||
--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs})
 | 
			
		||||
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
 | 
			
		||||
mknewline(defs, 1)
 | 
			
		||||
 | 
			
		||||
setmetatable(defs, {__index=defs.base})
 | 
			
		||||
 | 
			
		||||
function defs.string_open(state, token)
 | 
			
		||||
    if not state.in_string then
 | 
			
		||||
        state[#state+1] = TK_STRING
 | 
			
		||||
        state[COLLECT] = {}
 | 
			
		||||
        state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff?
 | 
			
		||||
        state.in_string = token
 | 
			
		||||
        return "string"
 | 
			
		||||
    end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										15
									
								
								parser.lua
									
										
									
									
									
								
							
							
						
						
									
										15
									
								
								parser.lua
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -39,6 +39,17 @@ local function get_next_common(state, in_pos, token)
 | 
			
		|||
    if state[STATE] then
 | 
			
		||||
        local st = state[STATE]
 | 
			
		||||
        local rule = st[token]
 | 
			
		||||
        do -- pre-hooks
 | 
			
		||||
            local pos = -1
 | 
			
		||||
            local hook = st[pos]
 | 
			
		||||
            while hook ~= nil do
 | 
			
		||||
                if hook then
 | 
			
		||||
                    hook(state, token, rule)
 | 
			
		||||
                end
 | 
			
		||||
                pos = pos - 1
 | 
			
		||||
                hook = st[pos]
 | 
			
		||||
            end
 | 
			
		||||
        end
 | 
			
		||||
        transition = rule
 | 
			
		||||
        if transition == nil then
 | 
			
		||||
            transition = st[""]
 | 
			
		||||
| 
						 | 
				
			
			@ -143,6 +154,10 @@ return {
 | 
			
		|||
        if not rule then
 | 
			
		||||
            local t = state[COLLECT]
 | 
			
		||||
            t[#t+1] = token
 | 
			
		||||
            if t.coalesce and #t > t.coalesce then
 | 
			
		||||
                t[1] = table.concat(t)
 | 
			
		||||
                for i=2, #t do t[i] = nil end
 | 
			
		||||
            end
 | 
			
		||||
        end
 | 
			
		||||
    end,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										73
									
								
								test.lua
									
										
									
									
									
								
							
							
						
						
									
										73
									
								
								test.lua
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -159,3 +159,76 @@ do -- even more lua tokens
 | 
			
		|||
        assert(table.remove(state, 1) == "\253\191\191\191\191\191")
 | 
			
		||||
    end
 | 
			
		||||
end -- lua tokens
 | 
			
		||||
 | 
			
		||||
do -- simple lua tokens
 | 
			
		||||
    local luatokens = require "luatokens"
 | 
			
		||||
    local tokens = luatokens.defs
 | 
			
		||||
    local state, err, etoken, estate = parser.parse(tokens, [[[""]])
 | 
			
		||||
    local case = case()
 | 
			
		||||
    if not state then
 | 
			
		||||
        print(case, "---- IN  TOKENS ----")
 | 
			
		||||
        print(case, err, etoken)
 | 
			
		||||
        for i,v in pairs(estate) do
 | 
			
		||||
            print(case, i, v)
 | 
			
		||||
        end
 | 
			
		||||
        print(case, "---- OUT TOKENS ----")
 | 
			
		||||
    else
 | 
			
		||||
        assert(table.remove(state, 1) == "[")
 | 
			
		||||
        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
 | 
			
		||||
        assert(table.remove(state, 1) == "")
 | 
			
		||||
    end
 | 
			
		||||
end -- lua tokens
 | 
			
		||||
 | 
			
		||||
do -- simple long string
 | 
			
		||||
    local luatokens = require "luatokens"
 | 
			
		||||
    local tokens = luatokens.defs
 | 
			
		||||
    local state, err, etoken, estate = parser.parse(tokens, [=[[[]]]=])
 | 
			
		||||
    local case = case()
 | 
			
		||||
    if not state then
 | 
			
		||||
        print(case, "---- IN  TOKENS ----")
 | 
			
		||||
        print(case, err, etoken)
 | 
			
		||||
        for i,v in pairs(estate) do
 | 
			
		||||
            print(case, i, v)
 | 
			
		||||
        end
 | 
			
		||||
        print(case, "---- OUT TOKENS ----")
 | 
			
		||||
    else
 | 
			
		||||
        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
 | 
			
		||||
        assert(table.remove(state, 1) == "")
 | 
			
		||||
    end
 | 
			
		||||
end -- lua tokens
 | 
			
		||||
 | 
			
		||||
do -- long string with depth 1
 | 
			
		||||
    local luatokens = require "luatokens"
 | 
			
		||||
    local tokens = luatokens.defs
 | 
			
		||||
    local state, err, etoken, estate = parser.parse(tokens, [==[[=[]=]]==])
 | 
			
		||||
    local case = case()
 | 
			
		||||
    if not state then
 | 
			
		||||
        print(case, "---- IN  TOKENS ----")
 | 
			
		||||
        print(case, err, etoken)
 | 
			
		||||
        for i,v in pairs(estate) do
 | 
			
		||||
            print(case, i, v)
 | 
			
		||||
        end
 | 
			
		||||
        print(case, "---- OUT TOKENS ----")
 | 
			
		||||
    else
 | 
			
		||||
        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
 | 
			
		||||
        assert(table.remove(state, 1) == "")
 | 
			
		||||
    end
 | 
			
		||||
end -- lua tokens
 | 
			
		||||
 | 
			
		||||
do -- long string with "nested" long string
 | 
			
		||||
    local luatokens = require "luatokens"
 | 
			
		||||
    local tokens = luatokens.defs
 | 
			
		||||
    local state, err, etoken, estate = parser.parse(tokens, [==[[=[[[]]]=]]==])
 | 
			
		||||
    local case = case()
 | 
			
		||||
    if not state then
 | 
			
		||||
        print(case, "---- IN  TOKENS ----")
 | 
			
		||||
        print(case, err, etoken)
 | 
			
		||||
        for i,v in pairs(estate) do
 | 
			
		||||
            print(case, i, v)
 | 
			
		||||
        end
 | 
			
		||||
        print(case, "---- OUT TOKENS ----")
 | 
			
		||||
    else
 | 
			
		||||
        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
 | 
			
		||||
        assert(table.remove(state, 1) == "[[]]")
 | 
			
		||||
    end
 | 
			
		||||
end -- lua tokens
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue