Add copyright notices, everything almost works
Numbers and long comments aren't working
This commit is contained in:
parent
fd48534de3
commit
f56e1bd7e7
238
luatokens.lua
238
luatokens.lua
|
@ -1,4 +1,74 @@
|
|||
-- Lua defs
|
||||
--[[
|
||||
luatokens.lua - pure-Lua Lua tokenizer
|
||||
Copyright (C) 2019 Soni L.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
--]]
|
||||
|
||||
--[[
|
||||
This software is based on Lua 5.1 and Lua 5.3
|
||||
|
||||
Lua 5.1 license:
|
||||
|
||||
/******************************************************************************
|
||||
* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
Lua 5.3 license:
|
||||
|
||||
/******************************************************************************
|
||||
* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
--]]
|
||||
|
||||
-- we need some stuff from here
|
||||
local parser = require "parser"
|
||||
|
@ -54,6 +124,11 @@ local keywords = {
|
|||
["while"] = TK_WHILE,
|
||||
}
|
||||
|
||||
local reverse_keywords = {}
|
||||
for k,v in pairs(keywords) do
|
||||
reverse_keywords[v] = k
|
||||
end
|
||||
|
||||
local defs = selfify({})
|
||||
|
||||
defs.base = {
|
||||
|
@ -315,6 +390,7 @@ do local tlongstring = {}
|
|||
|
||||
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
|
||||
tllongstring_proper.maybe_end = tllmaybe_end
|
||||
tllmaybe_end.longstring_proper = tllongstring_proper
|
||||
tllmaybe_end["="] = function(state, token)
|
||||
state.longstring_close = state.longstring_close + 1
|
||||
return "maybe_end"
|
||||
|
@ -335,6 +411,8 @@ do local tlongstring = {}
|
|||
return "maybe_end"
|
||||
end
|
||||
end
|
||||
tllmaybe_end[""] = "longstring_proper"
|
||||
tllmaybe_end[1] = collect_fallback
|
||||
tllmaybe_end[-1] = function(state, token, rule)
|
||||
if not rule then
|
||||
collect_fallback(state, "]")
|
||||
|
@ -390,6 +468,157 @@ defs.maybe_longstring = setmetatable({
|
|||
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
|
||||
mknewline(defs, 1)
|
||||
|
||||
-- thankfully comments are easy
|
||||
defs["-"] = "maybe_comment"
|
||||
do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
|
||||
defs.maybe_comment = tmaybe_comment
|
||||
tmaybe_comment[-1] = function(state, token, rule)
|
||||
if rule ~= "comment" then
|
||||
state[#state+1] = "-"
|
||||
end
|
||||
end
|
||||
do local tmcomment = {comment_proper = selfify({})}
|
||||
tmaybe_comment.comment = tmcomment
|
||||
tmcomment[""] = "comment_proper"
|
||||
tmcomment["["] = "maybe_longcomment"
|
||||
mknewline(tmcomment, 1, defs)
|
||||
mknewline(tmcomment.comment_proper, 1, defs)
|
||||
tmcomment.comment_proper[""] = "self"
|
||||
do local tlongcomment = {}
|
||||
tmcomment.longcomment = tlongcomment
|
||||
do local tllongcomment_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
|
||||
do local tllmaybe_end = selfify({comment = tcomment}, "maybe_end")
|
||||
tllongcomment_proper.maybe_end = tllmaybe_end
|
||||
tllmaybe_end = tllongcomment_proper
|
||||
tllmaybe_end["="] = function(state, token)
|
||||
state.longcomment_close = state.longcomment_close + 1
|
||||
return "maybe_end"
|
||||
end
|
||||
tllmaybe_end["]"] = function(state, token)
|
||||
if state.longcomment_close == state.longcomment_count then
|
||||
state.longcomment_close = nil
|
||||
state.longcomment_count = nil
|
||||
return "defs"
|
||||
else
|
||||
state.longcomment_close = 0
|
||||
return "maybe_end"
|
||||
end
|
||||
end
|
||||
tllmaybe_end[""] = "longcomment_proper"
|
||||
tllmaybe_end[-1] = function(state, token, rule)
|
||||
if not rule then
|
||||
state.longcomment_close = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
tlongcomment.longcomment_proper = tllongcomment_proper
|
||||
mknewline(tlongcomment, 1, tllongcomment_proper)
|
||||
setmetatable(tlongcomment, {__index=tllongcomment_proper})
|
||||
end
|
||||
end
|
||||
|
||||
tmcomment.maybe_longcomment = setmetatable({
|
||||
comment = tmcomment,
|
||||
['['] = "longcomment_open",
|
||||
['='] = "longcomment_open",
|
||||
longcomment_count = setmetatable(selfify({
|
||||
["="] = function(state, token)
|
||||
state.longcomment_count = state.longcomment_count + 1
|
||||
return "longcomment_count"
|
||||
end,
|
||||
["["] = "longcomment",
|
||||
longcomment = tmcomment.longcomment,
|
||||
}, "longcomment_count"), {__index=tmcomment}),
|
||||
longcomment_open = function(state, token)
|
||||
if token == "=" then
|
||||
state.longcomment_count = state.longcomment_count or 0 + 1
|
||||
return "longcomment_count"
|
||||
elseif token == "[" then
|
||||
state.longcomment_count = 0
|
||||
return "longstring"
|
||||
end
|
||||
end,
|
||||
}, {__index=tmcomment})
|
||||
end
|
||||
end
|
||||
|
||||
local STATE = parser.STATE
|
||||
|
||||
defs.multitokens = setmetatable({
|
||||
[-1] = function(state, token, rule)
|
||||
if not state[STATE].multitoken[token] then
|
||||
state[#state+1] = state[STATE].first
|
||||
end
|
||||
end,
|
||||
second = function(state, token)
|
||||
state[#state+1] = state[STATE].multitoken[token]
|
||||
return "self" -- actually goes into defs
|
||||
end
|
||||
}, {
|
||||
__index=defs,
|
||||
__call=function(t, first, ...)
|
||||
local function helper(t, second, result, ...)
|
||||
if not second then return end
|
||||
t[second] = "second"
|
||||
t.multitoken[second] = result
|
||||
return helper(t, ...)
|
||||
end
|
||||
defs[first] = setmetatable({
|
||||
first = first,
|
||||
multitoken = {}
|
||||
}, {__index=t})
|
||||
return helper(defs[first], ...)
|
||||
end
|
||||
})
|
||||
|
||||
defs.multitokens("=", "=", TK_EQ)
|
||||
defs.multitokens("/", "/", TK_IDIV)
|
||||
defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
|
||||
defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
|
||||
defs.multitokens("~", "=", TK_NE)
|
||||
defs.multitokens(":", ":", TK_DBCOLON)
|
||||
|
||||
defs["."] = setmetatable({
|
||||
[-1] = function(state, token, rule)
|
||||
if token ~= "." then
|
||||
if rule ~= "digit" then
|
||||
state[#state+1] = "."
|
||||
else
|
||||
error("NYI") -- TODO digit handling
|
||||
end
|
||||
end
|
||||
end,
|
||||
["."] = setmetatable({
|
||||
[-1] = function(state, token, rule)
|
||||
if token ~= "." then
|
||||
state[#state+1] = TK_CONCAT
|
||||
end
|
||||
end,
|
||||
["."] = function(state, token)
|
||||
state[#state+1] = TK_DOTS
|
||||
return "self" -- actually goes into defs
|
||||
end
|
||||
}, {__index=defs})
|
||||
}, {__index=defs})
|
||||
|
||||
function defs.digit(state, token)
|
||||
-- TODO
|
||||
end
|
||||
|
||||
defs.in_digit = {
|
||||
-- TODO
|
||||
}
|
||||
|
||||
function defs.simpletoken(state, token)
|
||||
state[#state+1] = token
|
||||
return "self"
|
||||
end
|
||||
|
||||
for token in string.gmatch("+*%^#&|(){}];,", ".") do
|
||||
defs[token] = "simpletoken"
|
||||
end
|
||||
|
||||
defs.whitespace = "self"
|
||||
defs.hexdigit = "alpha"
|
||||
defs["_"] = "alpha"
|
||||
|
@ -440,4 +669,11 @@ return {
|
|||
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
|
||||
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
|
||||
},
|
||||
reverse_keywords = reverse_keywords,
|
||||
reverse_tokens = {
|
||||
[TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
|
||||
[TK_SHL] = "<<", [TK_SHR] = ">>",
|
||||
[TK_DBCOLON] = "::", [TK_EOS] = "<eof>",
|
||||
[TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>"
|
||||
},
|
||||
}
|
||||
|
|
96
test.lua
96
test.lua
|
@ -90,6 +90,7 @@ do -- lua tokens
|
|||
else
|
||||
assert(state[1] == luatokens.tokens.TK_STRING)
|
||||
assert(state[2] == "hello world")
|
||||
assert(state[3] == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
@ -110,6 +111,7 @@ do -- more lua tokens
|
|||
else
|
||||
assert(state[1] == luatokens.tokens.TK_STRING)
|
||||
assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
|
||||
assert(state[3] == nil)
|
||||
assert(state.line == 2)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
@ -129,6 +131,7 @@ do -- even more lua tokens
|
|||
else
|
||||
assert(state[1] == luatokens.tokens.TK_STRING)
|
||||
assert(state[2] == "A")
|
||||
assert(state[3] == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
@ -168,6 +171,7 @@ do -- even more lua tokens
|
|||
assert(table.remove(state, 1) == "\252\132\128\128\128\128")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "\253\191\191\191\191\191")
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
@ -188,6 +192,7 @@ do -- simple lua tokens
|
|||
assert(table.remove(state, 1) == "[")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "")
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- lua tokens
|
||||
|
@ -207,6 +212,7 @@ do -- simple long string
|
|||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "")
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- long string
|
||||
|
@ -226,6 +232,7 @@ do -- long string with depth 1
|
|||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "")
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- long string
|
||||
|
@ -245,6 +252,7 @@ do -- long string with "nested" long string
|
|||
else
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "[[]]")
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- long string
|
||||
|
@ -252,7 +260,7 @@ end -- long string
|
|||
do -- long string edge cases
|
||||
local luatokens = require "luatokens"
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
|
||||
local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==][=[] ]=]")
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
|
@ -266,6 +274,9 @@ do -- long string edge cases
|
|||
assert(table.remove(state, 1) == "]=")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "]")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
|
||||
assert(table.remove(state, 1) == "] ")
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 1 or not state.line)
|
||||
end
|
||||
end -- long string
|
||||
|
@ -309,15 +320,20 @@ do -- keywords
|
|||
assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 4)
|
||||
end
|
||||
end -- keywords
|
||||
|
||||
do -- FUCK
|
||||
do -- "other tokens"
|
||||
local luatokens = require "luatokens"
|
||||
local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "Lua 5.1" or _VERSION == "Lua 5.2") and "*a" or "a")
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
|
||||
local state, err, etoken, estate = parser.parse(tokens, [[
|
||||
+ - * / % ^ #
|
||||
& ~ | << >> //
|
||||
== ~= <= >= < > =
|
||||
( ) { } [ ] ::
|
||||
; : , . .. ...]])
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
|
@ -326,5 +342,77 @@ do -- FUCK
|
|||
print(case, i, v)
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
else
|
||||
assert(table.remove(state, 1) == "+")
|
||||
assert(table.remove(state, 1) == "-")
|
||||
assert(table.remove(state, 1) == "*")
|
||||
assert(table.remove(state, 1) == "/")
|
||||
assert(table.remove(state, 1) == "%")
|
||||
assert(table.remove(state, 1) == "^")
|
||||
assert(table.remove(state, 1) == "#")
|
||||
assert(table.remove(state, 1) == "&")
|
||||
assert(table.remove(state, 1) == "~")
|
||||
assert(table.remove(state, 1) == "|")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_SHL)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_SHR)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_IDIV)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_EQ)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_NE)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_LE)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_GE)
|
||||
assert(table.remove(state, 1) == "<")
|
||||
assert(table.remove(state, 1) == ">")
|
||||
assert(table.remove(state, 1) == "=")
|
||||
assert(table.remove(state, 1) == "(")
|
||||
assert(table.remove(state, 1) == ")")
|
||||
assert(table.remove(state, 1) == "{")
|
||||
assert(table.remove(state, 1) == "}")
|
||||
assert(table.remove(state, 1) == "[")
|
||||
assert(table.remove(state, 1) == "]")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_DBCOLON)
|
||||
assert(table.remove(state, 1) == ";")
|
||||
assert(table.remove(state, 1) == ":")
|
||||
assert(table.remove(state, 1) == ",")
|
||||
assert(table.remove(state, 1) == ".")
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_CONCAT)
|
||||
assert(table.remove(state, 1) == luatokens.tokens.TK_DOTS)
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 5)
|
||||
end
|
||||
end -- "other tokens"
|
||||
|
||||
do -- long comments
|
||||
local luatokens = require "luatokens"
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, [==[--[[
|
||||
--]]]==])
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
print(case, err, etoken)
|
||||
for i,v in pairs(estate) do
|
||||
print(case, i, v)
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
else
|
||||
assert(table.remove(state, 1) == nil)
|
||||
assert(state.line == 2)
|
||||
end
|
||||
end -- long comments
|
||||
|
||||
while false do -- FUCK
|
||||
local luatokens = require "luatokens"
|
||||
local luatokens_file = io.open("./luatokens.lua", "r")
|
||||
local tokens = luatokens.defs
|
||||
local state, err, etoken, estate = parser.parse(tokens, function() return luatokens_file:read(8192) end)
|
||||
local case = case()
|
||||
if not state then
|
||||
print(case, "---- IN TOKENS ----")
|
||||
print(case, err, etoken)
|
||||
for i,v in pairs(estate) do
|
||||
v = luatokens.reverse_keywords[v] or luatokens.reverse_tokens[v] or v
|
||||
print(case, i, v)
|
||||
end
|
||||
print(case, "---- OUT TOKENS ----")
|
||||
end
|
||||
end -- FUCK
|
||||
|
|
Loading…
Reference in New Issue