diff --git a/luatokens.lua b/luatokens.lua
index 7bf9f68..406fba5 100644
--- a/luatokens.lua
+++ b/luatokens.lua
@@ -1,4 +1,74 @@
--- Lua defs
+--[[
+ luatokens.lua - pure-Lua Lua tokenizer
+ Copyright (C) 2019 Soni L.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+--]]
+
+--[[
+ This software is based on Lua 5.1 and Lua 5.3
+
+ Lua 5.1 license:
+
+/******************************************************************************
+* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+
+ Lua 5.3 license:
+
+/******************************************************************************
+* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+--]]
-- we need some stuff from here
local parser = require "parser"
@@ -54,6 +124,11 @@ local keywords = {
["while"] = TK_WHILE,
}
+local reverse_keywords = {}
+for k,v in pairs(keywords) do
+ reverse_keywords[v] = k
+end
+
local defs = selfify({})
defs.base = {
@@ -315,6 +390,7 @@ do local tlongstring = {}
do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
tllongstring_proper.maybe_end = tllmaybe_end
+ tllmaybe_end.longstring_proper = tllongstring_proper
tllmaybe_end["="] = function(state, token)
state.longstring_close = state.longstring_close + 1
return "maybe_end"
@@ -335,6 +411,8 @@ do local tlongstring = {}
return "maybe_end"
end
end
+ tllmaybe_end[""] = "longstring_proper"
+ tllmaybe_end[1] = collect_fallback
tllmaybe_end[-1] = function(state, token, rule)
if not rule then
collect_fallback(state, "]")
@@ -390,6 +468,157 @@ defs.maybe_longstring = setmetatable({
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
mknewline(defs, 1)
+-- thankfully comments are easy
+defs["-"] = "maybe_comment"
+do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
+ defs.maybe_comment = tmaybe_comment
+ tmaybe_comment[-1] = function(state, token, rule)
+ if rule ~= "comment" then
+ state[#state+1] = "-"
+ end
+ end
+ do local tmcomment = {comment_proper = selfify({})}
+ tmaybe_comment.comment = tmcomment
+ tmcomment[""] = "comment_proper"
+ tmcomment["["] = "maybe_longcomment"
+ mknewline(tmcomment, 1, defs)
+ mknewline(tmcomment.comment_proper, 1, defs)
+ tmcomment.comment_proper[""] = "self"
+ do local tlongcomment = {}
+ tmcomment.longcomment = tlongcomment
+ do local tllongcomment_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
+ do local tllmaybe_end = selfify({comment = tcomment}, "maybe_end")
+ tllongcomment_proper.maybe_end = tllmaybe_end
+ tllmaybe_end = tllongcomment_proper
+ tllmaybe_end["="] = function(state, token)
+ state.longcomment_close = state.longcomment_close + 1
+ return "maybe_end"
+ end
+ tllmaybe_end["]"] = function(state, token)
+ if state.longcomment_close == state.longcomment_count then
+ state.longcomment_close = nil
+ state.longcomment_count = nil
+ return "defs"
+ else
+ state.longcomment_close = 0
+ return "maybe_end"
+ end
+ end
+ tllmaybe_end[""] = "longcomment_proper"
+ tllmaybe_end[-1] = function(state, token, rule)
+ if not rule then
+ state.longcomment_close = nil
+ end
+ end
+ end
+
+ tlongcomment.longcomment_proper = tllongcomment_proper
+ mknewline(tlongcomment, 1, tllongcomment_proper)
+ setmetatable(tlongcomment, {__index=tllongcomment_proper})
+ end
+ end
+
+ tmcomment.maybe_longcomment = setmetatable({
+ comment = tmcomment,
+ ['['] = "longcomment_open",
+ ['='] = "longcomment_open",
+ longcomment_count = setmetatable(selfify({
+ ["="] = function(state, token)
+ state.longcomment_count = state.longcomment_count + 1
+ return "longcomment_count"
+ end,
+ ["["] = "longcomment",
+ longcomment = tmcomment.longcomment,
+ }, "longcomment_count"), {__index=tmcomment}),
+ longcomment_open = function(state, token)
+ if token == "=" then
+ state.longcomment_count = state.longcomment_count or 0 + 1
+ return "longcomment_count"
+ elseif token == "[" then
+ state.longcomment_count = 0
+ return "longstring"
+ end
+ end,
+ }, {__index=tmcomment})
+ end
+end
+
+local STATE = parser.STATE
+
+defs.multitokens = setmetatable({
+ [-1] = function(state, token, rule)
+ if not state[STATE].multitoken[token] then
+ state[#state+1] = state[STATE].first
+ end
+ end,
+ second = function(state, token)
+ state[#state+1] = state[STATE].multitoken[token]
+ return "self" -- actually goes into defs
+ end
+}, {
+ __index=defs,
+ __call=function(t, first, ...)
+ local function helper(t, second, result, ...)
+ if not second then return end
+ t[second] = "second"
+ t.multitoken[second] = result
+ return helper(t, ...)
+ end
+ defs[first] = setmetatable({
+ first = first,
+ multitoken = {}
+ }, {__index=t})
+ return helper(defs[first], ...)
+ end
+})
+
+defs.multitokens("=", "=", TK_EQ)
+defs.multitokens("/", "/", TK_IDIV)
+defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
+defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
+defs.multitokens("~", "=", TK_NE)
+defs.multitokens(":", ":", TK_DBCOLON)
+
+defs["."] = setmetatable({
+ [-1] = function(state, token, rule)
+ if token ~= "." then
+ if rule ~= "digit" then
+ state[#state+1] = "."
+ else
+ error("NYI") -- TODO digit handling
+ end
+ end
+ end,
+ ["."] = setmetatable({
+ [-1] = function(state, token, rule)
+ if token ~= "." then
+ state[#state+1] = TK_CONCAT
+ end
+ end,
+ ["."] = function(state, token)
+ state[#state+1] = TK_DOTS
+ return "self" -- actually goes into defs
+ end
+ }, {__index=defs})
+}, {__index=defs})
+
+function defs.digit(state, token)
+ -- TODO
+end
+
+defs.in_digit = {
+ -- TODO
+}
+
+function defs.simpletoken(state, token)
+ state[#state+1] = token
+ return "self"
+end
+
+for token in string.gmatch("+*%^#&|(){}];,", ".") do
+ defs[token] = "simpletoken"
+end
+
defs.whitespace = "self"
defs.hexdigit = "alpha"
defs["_"] = "alpha"
@@ -440,4 +669,11 @@ return {
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
},
+ reverse_keywords = reverse_keywords,
+ reverse_tokens = {
+ [TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
+ [TK_SHL] = "<<", [TK_SHR] = ">>",
+ [TK_DBCOLON] = "::", [TK_EOS] = "",
+ [TK_FLT] = "", [TK_INT] = "", [TK_NAME] = "", [TK_STRING] = ""
+ },
}
diff --git a/test.lua b/test.lua
index f9648eb..a8a830d 100644
--- a/test.lua
+++ b/test.lua
@@ -90,6 +90,7 @@ do -- lua tokens
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "hello world")
+ assert(state[3] == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@@ -110,6 +111,7 @@ do -- more lua tokens
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
+ assert(state[3] == nil)
assert(state.line == 2)
end
end -- lua tokens
@@ -129,6 +131,7 @@ do -- even more lua tokens
else
assert(state[1] == luatokens.tokens.TK_STRING)
assert(state[2] == "A")
+ assert(state[3] == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@@ -168,6 +171,7 @@ do -- even more lua tokens
assert(table.remove(state, 1) == "\252\132\128\128\128\128")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "\253\191\191\191\191\191")
+ assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@@ -188,6 +192,7 @@ do -- simple lua tokens
assert(table.remove(state, 1) == "[")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "")
+ assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- lua tokens
@@ -207,6 +212,7 @@ do -- simple long string
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "")
+ assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@@ -226,6 +232,7 @@ do -- long string with depth 1
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "")
+ assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@@ -245,6 +252,7 @@ do -- long string with "nested" long string
else
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "[[]]")
+ assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@@ -252,7 +260,7 @@ end -- long string
do -- long string edge cases
local luatokens = require "luatokens"
local tokens = luatokens.defs
- local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
+ local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==][=[] ]=]")
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
@@ -266,6 +274,9 @@ do -- long string edge cases
assert(table.remove(state, 1) == "]=")
assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
assert(table.remove(state, 1) == "]")
+ assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+ assert(table.remove(state, 1) == "] ")
+ assert(table.remove(state, 1) == nil)
assert(state.line == 1 or not state.line)
end
end -- long string
@@ -309,15 +320,20 @@ do -- keywords
assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
+ assert(table.remove(state, 1) == nil)
assert(state.line == 4)
end
end -- keywords
-do -- FUCK
+do -- "other tokens"
local luatokens = require "luatokens"
- local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "Lua 5.1" or _VERSION == "Lua 5.2") and "*a" or "a")
local tokens = luatokens.defs
- local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
+ local state, err, etoken, estate = parser.parse(tokens, [[
+ + - * / % ^ #
+ & ~ | << >> //
+ == ~= <= >= < > =
+ ( ) { } [ ] ::
+ ; : , . .. ...]])
local case = case()
if not state then
print(case, "---- IN TOKENS ----")
@@ -326,5 +342,77 @@ do -- FUCK
print(case, i, v)
end
print(case, "---- OUT TOKENS ----")
+ else
+ assert(table.remove(state, 1) == "+")
+ assert(table.remove(state, 1) == "-")
+ assert(table.remove(state, 1) == "*")
+ assert(table.remove(state, 1) == "/")
+ assert(table.remove(state, 1) == "%")
+ assert(table.remove(state, 1) == "^")
+ assert(table.remove(state, 1) == "#")
+ assert(table.remove(state, 1) == "&")
+ assert(table.remove(state, 1) == "~")
+ assert(table.remove(state, 1) == "|")
+ assert(table.remove(state, 1) == luatokens.tokens.TK_SHL)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_SHR)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_IDIV)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_EQ)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_NE)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_LE)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_GE)
+ assert(table.remove(state, 1) == "<")
+ assert(table.remove(state, 1) == ">")
+ assert(table.remove(state, 1) == "=")
+ assert(table.remove(state, 1) == "(")
+ assert(table.remove(state, 1) == ")")
+ assert(table.remove(state, 1) == "{")
+ assert(table.remove(state, 1) == "}")
+ assert(table.remove(state, 1) == "[")
+ assert(table.remove(state, 1) == "]")
+ assert(table.remove(state, 1) == luatokens.tokens.TK_DBCOLON)
+ assert(table.remove(state, 1) == ";")
+ assert(table.remove(state, 1) == ":")
+ assert(table.remove(state, 1) == ",")
+ assert(table.remove(state, 1) == ".")
+ assert(table.remove(state, 1) == luatokens.tokens.TK_CONCAT)
+ assert(table.remove(state, 1) == luatokens.tokens.TK_DOTS)
+ assert(table.remove(state, 1) == nil)
+ assert(state.line == 5)
+ end
+end -- "other tokens"
+
+do -- long comments
+ local luatokens = require "luatokens"
+ local tokens = luatokens.defs
+ local state, err, etoken, estate = parser.parse(tokens, [==[--[[
+ --]]]==])
+ local case = case()
+ if not state then
+ print(case, "---- IN TOKENS ----")
+ print(case, err, etoken)
+ for i,v in pairs(estate) do
+ print(case, i, v)
+ end
+ print(case, "---- OUT TOKENS ----")
+ else
+ assert(table.remove(state, 1) == nil)
+ assert(state.line == 2)
+ end
+end -- long comments
+
+while false do -- FUCK
+ local luatokens = require "luatokens"
+ local luatokens_file = io.open("./luatokens.lua", "r")
+ local tokens = luatokens.defs
+ local state, err, etoken, estate = parser.parse(tokens, function() return luatokens_file:read(8192) end)
+ local case = case()
+ if not state then
+ print(case, "---- IN TOKENS ----")
+ print(case, err, etoken)
+ for i,v in pairs(estate) do
+ v = luatokens.reverse_keywords[v] or luatokens.reverse_tokens[v] or v
+ print(case, i, v)
+ end
+ print(case, "---- OUT TOKENS ----")
end
end -- FUCK