Add copyright notices, everything almost works

Numbers and long comments aren't working
This commit is contained in:
SoniEx2 2019-04-08 13:57:28 -03:00
parent fd48534de3
commit f56e1bd7e7
2 changed files with 329 additions and 5 deletions

View File

@ -1,4 +1,74 @@
-- Lua defs
--[[
luatokens.lua - pure-Lua Lua tokenizer
Copyright (C) 2019 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
--[[
This software is based on Lua 5.1 and Lua 5.3
Lua 5.1 license:
/******************************************************************************
* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
******************************************************************************/
Lua 5.3 license:
/******************************************************************************
* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
******************************************************************************/
--]]
-- we need some stuff from here
local parser = require "parser"
@ -54,6 +124,11 @@ local keywords = {
["while"] = TK_WHILE,
}
local reverse_keywords = {}
for k,v in pairs(keywords) do
reverse_keywords[v] = k
end
local defs = selfify({})
defs.base = {
@ -315,6 +390,7 @@ do local tlongstring = {}
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
tllongstring_proper.maybe_end = tllmaybe_end
tllmaybe_end.longstring_proper = tllongstring_proper
tllmaybe_end["="] = function(state, token)
state.longstring_close = state.longstring_close + 1
return "maybe_end"
@ -335,6 +411,8 @@ do local tlongstring = {}
return "maybe_end"
end
end
tllmaybe_end[""] = "longstring_proper"
tllmaybe_end[1] = collect_fallback
tllmaybe_end[-1] = function(state, token, rule)
if not rule then
collect_fallback(state, "]")
@ -390,6 +468,157 @@ defs.maybe_longstring = setmetatable({
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
mknewline(defs, 1)
-- thankfully comments are easy
defs["-"] = "maybe_comment"
do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
defs.maybe_comment = tmaybe_comment
tmaybe_comment[-1] = function(state, token, rule)
if rule ~= "comment" then
state[#state+1] = "-"
end
end
do local tmcomment = {comment_proper = selfify({})}
tmaybe_comment.comment = tmcomment
tmcomment[""] = "comment_proper"
tmcomment["["] = "maybe_longcomment"
mknewline(tmcomment, 1, defs)
mknewline(tmcomment.comment_proper, 1, defs)
tmcomment.comment_proper[""] = "self"
do local tlongcomment = {}
tmcomment.longcomment = tlongcomment
do local tllongcomment_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
do local tllmaybe_end = selfify({comment = tcomment}, "maybe_end")
tllongcomment_proper.maybe_end = tllmaybe_end
tllmaybe_end = tllongcomment_proper
tllmaybe_end["="] = function(state, token)
state.longcomment_close = state.longcomment_close + 1
return "maybe_end"
end
tllmaybe_end["]"] = function(state, token)
if state.longcomment_close == state.longcomment_count then
state.longcomment_close = nil
state.longcomment_count = nil
return "defs"
else
state.longcomment_close = 0
return "maybe_end"
end
end
tllmaybe_end[""] = "longcomment_proper"
tllmaybe_end[-1] = function(state, token, rule)
if not rule then
state.longcomment_close = nil
end
end
end
tlongcomment.longcomment_proper = tllongcomment_proper
mknewline(tlongcomment, 1, tllongcomment_proper)
setmetatable(tlongcomment, {__index=tllongcomment_proper})
end
end
tmcomment.maybe_longcomment = setmetatable({
comment = tmcomment,
['['] = "longcomment_open",
['='] = "longcomment_open",
longcomment_count = setmetatable(selfify({
["="] = function(state, token)
state.longcomment_count = state.longcomment_count + 1
return "longcomment_count"
end,
["["] = "longcomment",
longcomment = tmcomment.longcomment,
}, "longcomment_count"), {__index=tmcomment}),
longcomment_open = function(state, token)
if token == "=" then
state.longcomment_count = state.longcomment_count or 0 + 1
return "longcomment_count"
elseif token == "[" then
state.longcomment_count = 0
return "longstring"
end
end,
}, {__index=tmcomment})
end
end
local STATE = parser.STATE
defs.multitokens = setmetatable({
[-1] = function(state, token, rule)
if not state[STATE].multitoken[token] then
state[#state+1] = state[STATE].first
end
end,
second = function(state, token)
state[#state+1] = state[STATE].multitoken[token]
return "self" -- actually goes into defs
end
}, {
__index=defs,
__call=function(t, first, ...)
local function helper(t, second, result, ...)
if not second then return end
t[second] = "second"
t.multitoken[second] = result
return helper(t, ...)
end
defs[first] = setmetatable({
first = first,
multitoken = {}
}, {__index=t})
return helper(defs[first], ...)
end
})
defs.multitokens("=", "=", TK_EQ)
defs.multitokens("/", "/", TK_IDIV)
defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
defs.multitokens("~", "=", TK_NE)
defs.multitokens(":", ":", TK_DBCOLON)
defs["."] = setmetatable({
[-1] = function(state, token, rule)
if token ~= "." then
if rule ~= "digit" then
state[#state+1] = "."
else
error("NYI") -- TODO digit handling
end
end
end,
["."] = setmetatable({
[-1] = function(state, token, rule)
if token ~= "." then
state[#state+1] = TK_CONCAT
end
end,
["."] = function(state, token)
state[#state+1] = TK_DOTS
return "self" -- actually goes into defs
end
}, {__index=defs})
}, {__index=defs})
function defs.digit(state, token)
-- TODO
end
defs.in_digit = {
-- TODO
}
function defs.simpletoken(state, token)
state[#state+1] = token
return "self"
end
for token in string.gmatch("+*%^#&|(){}];,", ".") do
defs[token] = "simpletoken"
end
defs.whitespace = "self"
defs.hexdigit = "alpha"
defs["_"] = "alpha"
@ -440,4 +669,11 @@ return {
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
},
reverse_keywords = reverse_keywords,
reverse_tokens = {
[TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
[TK_SHL] = "<<", [TK_SHR] = ">>",
[TK_DBCOLON] = "::", [TK_EOS] = "<eof>",
[TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>"
},
}

View File

@ -90,6 +90,7 @@ do -- lua tokens
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "hello world")
assert(state[3] == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@ -110,6 +111,7 @@ do -- more lua tokens
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
assert(state[3] == nil)
assert(state.line == 2)
end
end -- lua tokens
@ -129,6 +131,7 @@ do -- even more lua tokens
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "A")
assert(state[3] == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@ -168,6 +171,7 @@ do -- even more lua tokens
assert(table.remove(state, 1) == "\252\132\128\128\128\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\253\191\191\191\191\191")
assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@ -188,6 +192,7 @@ do -- simple lua tokens
assert(table.remove(state, 1) == "[")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "")
assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@ -207,6 +212,7 @@ do -- simple long string
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "")
assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@ -226,6 +232,7 @@ do -- long string with depth 1
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "")
assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@ -245,6 +252,7 @@ do -- long string with "nested" long string
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "[[]]")
assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@ -252,7 +260,7 @@ end -- long string
do -- long string edge cases
local luatokens = require "luatokens"
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==][=[] ]=]")
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
@ -266,6 +274,9 @@ do -- long string edge cases
assert(table.remove(state, 1) == "]=")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "]")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "] ")
assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@ -309,15 +320,20 @@ do -- keywords
assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
assert(table.remove(state, 1) == nil)
assert(state.line == 4)
end
end -- keywords
do -- FUCK
do -- "other tokens"
local luatokens = require "luatokens"
local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "Lua 5.1" or _VERSION == "Lua 5.2") and "*a" or "a")
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
local state, err, etoken, estate = parser.parse(tokens, [[
+ - * / % ^ #
& ~ | << >> //
== ~= <= >= < > =
( ) { } [ ] ::
; : , . .. ...]])
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
@ -326,5 +342,77 @@ do -- FUCK
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
assert(table.remove(state, 1) == "+")
assert(table.remove(state, 1) == "-")
assert(table.remove(state, 1) == "*")
assert(table.remove(state, 1) == "/")
assert(table.remove(state, 1) == "%")
assert(table.remove(state, 1) == "^")
assert(table.remove(state, 1) == "#")
assert(table.remove(state, 1) == "&")
assert(table.remove(state, 1) == "~")
assert(table.remove(state, 1) == "|")
assert(table.remove(state, 1) == luatokens.tokens.TK_SHL)
assert(table.remove(state, 1) == luatokens.tokens.TK_SHR)
assert(table.remove(state, 1) == luatokens.tokens.TK_IDIV)
assert(table.remove(state, 1) == luatokens.tokens.TK_EQ)
assert(table.remove(state, 1) == luatokens.tokens.TK_NE)
assert(table.remove(state, 1) == luatokens.tokens.TK_LE)
assert(table.remove(state, 1) == luatokens.tokens.TK_GE)
assert(table.remove(state, 1) == "<")
assert(table.remove(state, 1) == ">")
assert(table.remove(state, 1) == "=")
assert(table.remove(state, 1) == "(")
assert(table.remove(state, 1) == ")")
assert(table.remove(state, 1) == "{")
assert(table.remove(state, 1) == "}")
assert(table.remove(state, 1) == "[")
assert(table.remove(state, 1) == "]")
assert(table.remove(state, 1) == luatokens.tokens.TK_DBCOLON)
assert(table.remove(state, 1) == ";")
assert(table.remove(state, 1) == ":")
assert(table.remove(state, 1) == ",")
assert(table.remove(state, 1) == ".")
assert(table.remove(state, 1) == luatokens.tokens.TK_CONCAT)
assert(table.remove(state, 1) == luatokens.tokens.TK_DOTS)
assert(table.remove(state, 1) == nil)
assert(state.line == 5)
end
end -- "other tokens"
do -- long comments
local luatokens = require "luatokens"
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, [==[--[[
--]]]==])
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
else
assert(table.remove(state, 1) == nil)
assert(state.line == 2)
end
end -- long comments
while false do -- FUCK
local luatokens = require "luatokens"
local luatokens_file = io.open("./luatokens.lua", "r")
local tokens = luatokens.defs
local state, err, etoken, estate = parser.parse(tokens, function() return luatokens_file:read(8192) end)
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
print(case, err, etoken)
for i,v in pairs(estate) do
v = luatokens.reverse_keywords[v] or luatokens.reverse_tokens[v] or v
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
end
end -- FUCK