It... kinda works?

This commit is contained in:
SoniEx2 2019-07-30 21:12:16 -03:00
parent af3acfbb80
commit 4b365cdab1
8 changed files with 1321 additions and 150 deletions

View File

@ -70,169 +70,699 @@
******************************************************************************/
--]]
-- a parser.lua-based cratera compiler
-- a few notes:
-- * all "next" should be tables. as well as all "super" (which should be "next").
-- (it wouldn't work properly without this)
-- * when calling into a deeper level, remember to use the second return value "retry"
-- (i.e. set it to true)
-- this is basically just a straight translation of the lparser.c
-- main difference is we don't care about lua_State *L
local parser = require "parser"
local selfify = parser.selfify
local STATE = parser.STATE
local l = require "luatokens".tokens
local assert, type, setmetatable = assert, type, setmetatable
local TK = require "luatokens".TK
local error, assert = error, assert
-- try to avoid making too many locals because Lua has a limit to how many locals you can have
local coroutine = {create = coroutine.create,
resume = coroutine.resume,
yield = coroutine.yield}
local math = {huge = math.huge,
floor = math.floor}
local string = {format = string.format}
local function tostring__name(self)
return getmetatable(self).__name
local luaX = {} -- lexer
local luaK = {} -- code generator
luaK.ret = function() end -- FIXME
luaX.next = (function()
local extra_tokens = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true}
return function(ls)
ls.lastline = ls.linenumber
if ls.lookahead_token then
ls.t_token = ls.lookahead_token
ls.lookahead_token = nil
ls.t_seminfo = ls.lookahead_seminfo
end
local token = coroutine.yield()
ls.t_token = token
if extra_tokens[token] then
ls.t_seminfo = coroutine.yield()
end
end
end)()
local function save_token(ls)
local tk = ls.t_token
local seminfo = ls.t_seminfo
local c = ls[parser.COLLECT] or ls
if tk == TK.FLOAT then
local token = seminfo
local extra, num, den = 1, token, 1
assert(token == token and token >= 0, "NYI") -- the tokenizer should never output NaNs or negative values
if token == math.huge then
num, den = 1, 0
else
while num ~= math.floor(num) do
num = num * 2 -- always safe (I think)
local oldden = den
den = den * 2
if den == math.huge then -- subnormals or something?
extra = oldden
den = 2
end
end
end
c[#c+1] = string.format('((%d/%d)/%d)', num, den, extra)
elseif tk == TK.INT then
c[#c+1] = string.format('%d', seminfo)
elseif tk == TK.STRING then
c[#c+1] = string.format('%q', seminfo)
elseif tk == TK.NAME then
c[#c+1] = seminfo
else
c[#c+1] = tostring(tk)
end
end
local function Upvaldesc() return {
name = nil, -- TString -- upvalue name (for debug information)
instack = false, -- lu_byte -- whether it is in stack (register)
idx = 0, -- lu_byte -- index of upvalue (in stack or in outer function's list)
} end
local function LocVar() return {
varname = nil, -- TString
startpc = 0, -- int -- first point where variable is active
endpc = 0, -- int -- first point where variable is dead
} end
local function Proto() return { -- is a GC object
numparams = 0, -- lu_byte -- number of fixed parameters
is_vararg = false, -- lu_byte but boolean
maxstacksize = 0, -- lu_byte -- number of registers needed by this function
k = {}, -- TValue * -- constants used by the function
code = {}, -- Instruction * -- opcodes
p = {}, -- Proto ** -- functions defined inside the function
lineinfo = {}, -- int * -- map from opcodes to source lines (debug information)
locvars = {}, -- LocVar * -- information about local variables (debug information)
uvalues = {}, -- Upvaldesc * -- upvalue information
} end
local function FuncState() return {
f = nil, -- Proto -- current function header
prev = nil, -- FuncState -- enclosing function
ls = nil, -- LexState -- lexical state
bl = nil, -- BlockCnt -- chain of current blocks
pc = 0, -- int -- next position to code (equivalent to 'ncode')
lasttarget = 0, -- int -- 'label' of last 'jump label'
jpc = 0, -- int -- number of pending jumps to 'pc'
--nk = 0, -- int -- number of elements in 'k'
--np = 0, -- int -- number of elements in 'p'
firstlocal = 0, -- int -- index of first local var (in Dyndata array)
nlocvars = 0, -- short -- number of elements in 'f->locvars'
nactvar = 0, -- lu_byte -- number of active local variables
nups = 0, -- lu_byte -- number of upvalues
freereg = 0, -- lu_byte -- first free register
} end
local function Labeldesc() return {
name = nil, -- TString -- label identifier
pc = nil, -- int -- position in code
line = nil, -- int -- line where it appeared
nactvar = nil, -- lu_byte -- local level where it appears in current block
} end
local function Dyndata() return {
actvar = {}, -- ArrayList of Vardesc (short) -- list of active local variables
gt = {}, -- Labellist (ArrayList of Labeldesc) -- list of pending gotos
label = {}, -- Labellist (ArrayList of Labeldesc) -- list of active labels
} end
local function ParserState() return { -- LexState
fs = nil, -- FuncState *
dyd = nil, -- Dyndata *
} end
local gotostatname = {[parser.EOZ] = false}
local gotostatnamemt = {__index=gotostatname, __name="gotostatname", __tostring=tostring__name}
gotostatname[parser.FALLBACK] = function(state, token)
assert(type(token) == "string")
state[#state+1] = "goto"
state[#state+1] = token
return state[STATE].next
function luaX.syntaxerror(ls, msg)
error("NYI")
end
local gotostat = {[parser.EOZ] = false}
local gotostatmt = {__index=gotostat, __name="gotostat", __tostring=tostring__name}
gotostat[l.TK_NAME] = function(state, token)
return setmetatable({next = state[STATE].next}, gotostatnamemt)
-- maximum number of local variables per function (must be smaller
-- than 250, due to the bytecode format)
local MAXVARS = 200
-- hasmultret TODO
-- eqstr TODO
-- prototypes for recursive non-terminal functions
local statement, expr
-- semantic error
local function semerror(ls, msg)
ls.t_token = nil -- remove "near <token>" from final message
luaX.syntaxerror(ls, msg)
end
local singlevar = {[parser.EOZ] = false}
local singlevarmt = {__index=singlevar, __name="singlevar", __tostring=tostring__name}
singlevar[parser.FALLBACK] = function(state, token)
assert(type(token) == "string")
state[#state+1] = token
return state[STATE].next
local function error_expected(ls, token)
luaX.syntaxerror(ls, string.format("%s expected", tostring(token)))
end
local primaryexp = {[parser.EOZ] = false}
local primaryexpmt = {__name="primaryexp", __tostring=tostring__name}
primaryexp['('] = function(state, token) end
primaryexp[l.TK_NAME] = function(state, token)
return setmetatable({next=state[STATE].next}, singlevarmt)
-- errorlimit TODO
-- checklimit TODO
local function testnext(ls, c)
if ls.t_token == c then
save_token(ls)
luaX.next(ls)
return true
end
return false
end
local suffixedexp = {}
local suffixedexpmt = {__name="suffixedexp", __tostring=tostring__name}
suffixedexp.next = function() end
local exprstat = {}
local exprstatmt = {__index=exprstat, __name="exprstat", __tostring=tostring__name}
exprstat.next = {}
local statementt = {[parser.EOZ] = false}
local statementmt = {__index=statementt, __name="statement", __tostring=tostring__name}
local function statement(state, token)
local cur = state[STATE]
return setmetatable({next = cur.next}, statementmt), true
end
statementt[";"] = function(state, token)
state[#state+1] = token
return "next"
end
statementt[l.TK_IF] = function(state, token) end
statementt[l.TK_WHILE] = function(state, token) end
statementt[l.TK_DO] = function(state, token) end
statementt[l.TK_FOR] = function(state, token) end
statementt[l.TK_REPEAT] = function(state, token) end
statementt[l.TK_FUNCTION] = function(state, token) end
statementt[l.TK_LOCAL] = function(state, token) end
statementt[l.TK_DBCOLON] = function(state, token) end
statementt[l.TK_RETURN] = function(state, token) end
statementt[l.TK_BREAK] = function(state, token)
state[#state+1] = "break"
return "next"
end
statementt[l.TK_GOTO] = function(state, token)
return setmetatable({next = state[STATE].next}, gotostatmt)
end
statementt[parser.FALLBACK] = function(state, token)
return setmetatable({super = state[STATE].next}, exprstatmt), true
local function check(ls, c)
if ls.t_token ~= c then
error_expected(ls, c)
end
end
local statlistt = {}
local statlistmt = {__index=statlistt, __name="statlist", __tostring=tostring__name}
local function statlist(state, token)
local cur = state[STATE]
return setmetatable(selfify({super = cur.next, withuntil = cur.withuntil}, "next"), statlistmt), true
local function checknext(ls, c)
check(ls, c)
save_token(ls)
luaX.next(ls)
end
statlistt[l.TK_ELSE] = function() return "super", true end
statlistt[l.TK_ELSEIF] = function() return "super", true end
statlistt[l.TK_END] = function() return "super", true end
statlistt[parser.EOZ] = function() return "super", true end
statlistt[l.TK_UNTIL] = function() return "withuntil", true end
statlistt[parser.FALLBACK] = statement
local mainfunc = setmetatable({}, {__name="mainfunc", __tostring=tostring__name})
mainfunc.withuntil = "super"
mainfunc[parser.EOZ] = parser.FALLBACK
mainfunc[parser.FALLBACK] = statlist
mainfunc.next = {
[parser.EOZ] = {}
}
local function check_condition(ls, c, msg) if not c then luaX.syntaxerror(ls, msg) end end
local defs = setmetatable({}, {__name="defs", __tostring=tostring__name})
local function check_match(ls, what, who, where)
if not testnext(ls, what) then
if where == ls.linenumber then
error_expected(ls, what)
else
luaX.syntaxerror(ls, string.format("%s expected (to close %s at line %d)", tostring(what), tostring(who), where))
end
end
end
local function str_checkname(ls)
check(ls, TK.NAME)
local ts = ls.t_seminfo
save_token(ls)
luaX.next(ls)
return ts
end
local function init_exp(expdesc, expkind, i)
expdesc.t = NO_JUMP
expdesc.f = expdesc.t
expdesc.k = expkind
expdesc.val = i
end
local function codestring(ls, e, s)
init_exp(e, VK, luaK.stringK(ls.fs, s))
end
-- checkname TODO
-- registerlocalvar TODO
-- new_localvar TODO
-- new_localvarliteral_ TODO
-- new_localvarliteral TODO
-- getlocvar TODO
-- adjustlocalvars TODO
-- removevars TODO
-- searchupvalue TODO
-- newupvalue TODO
-- searchvar TODO
-- markupval TODO
-- singlevaraux TODO
-- singlevar TODO
-- adjust_assign TODO
local function enterlevel(ls)
-- don't bother
--local L = ls.L
--L.nCcalls = L.nCcalls + 1
--checklimit(ls.fs, L.nCcalls, LUAI_MAXCCALLS, "C levels")
end
local function leavelevel(ls)
--ls.L.nCcalls = ls.L.nCcalls - 1
end
-- closegoto TODO
-- findlabel TODO
-- newlabelentry TODO
-- findgotos TODO
-- movegotosout TODO
local function enterblock(fs, bl, isloop)
bl.isloop = isloop
bl.nactvar = fs.nactvar
bl.firstlabel = #fs.ls.dyd.label
bl.firstgoto = #fs.ls.dyd.gt
bl.upval = 0
bl.previous = fs.bl
fs.bl = bl
--lua_assert(fs.freereg == fs.nactvar)
end
-- breaklabel TODO
-- undefgoto TODO
local function leaveblock(fs)
local bl = fs.bl
local ls = fs.ls
if bl.previous and bl.upval then
-- create a 'jump to here' to close upvalues
local j = luaK.jump(fs)
luaK.patchclose(fs, j, bl.nactvar)
luaK.patchtohere(fs, j)
end
if bl.isloop then
breaklabel(ls) -- close pending breaks
end
fs.bl = bl.previous
removevars(fs, bl.nactvar)
--lua_assert(bl.nactvar == fs.nactvar)
fs.freereg = fs.nactvar -- free registers
for i=bl.firstlabel,#ls.dyd.label do ls.dyd.label[i]=nil end -- remove local labels
if bl.previous then
movegotosout(fs, bl)
elseif bl.firstgoto < #ls.dyd.gt then
undefgoto(ls, ls.dyd.gt[bl.firstgoto])
end
end
-- addprototype TODO
-- codes instruction to create new closure in parent function.
-- The OP_CLOSURe instruction must use the last available register,
-- so that, if it invokes the GC, the GC knows which registers
-- are in use at that time.
local function codeclosure(ls, v)
local fs = ls.fs.prev
init_exp(v, VRELOCABLE, luaK.codeABx(fs, OP_CLOSURE, 0, #fs.f.p - 1))
luaK.exp2nextreg(fs, v) -- fix it at the last register
end
local function open_func(ls, fs, bl)
fs.prev = ls.fs
fs.ls = ls
ls.fs = fs
fs.pc = 0
fs.lasttarget = 0
fs.jpc = NO_JUMP
fs.freereg = 0
fs.nactvar = 0
fs.firstlocal = #ls.dyd.actvar
fs.bl = nil
local f = fs.f
f.source = ls.source
f.maxstacksize = 2 -- registers 0/1 are always valid
enterblock(fs, bl, false)
end
local function close_func(ls)
local fs = ls.fs
local f = fs.f
luaK.ret(fs, 0, 0) -- final return
leaveblock(fs)
-- don't need to worry about reallocating vectors
--lua_assert(fs.bl == nil)
ls.fs = fs.prev
end
local block_follow = (function()
local tokens = {[TK.ELSE] = true, [TK.ELSEIF] = true, [TK.END] = true, [parser.EOZ] = true}
return function(ls, withuntil)
local tk = ls.t_token
return tokens[tk] or (withuntil and tk == TK.UNTIL)
end
end)()
local function statlist(ls)
-- statlist -> { stat [';'] }
while not block_follow(ls, true) do
if ls.t_token == TK_RETURN then
statement(ls)
return -- 'return' must be last statement
end
statement(ls)
end
end
-- fieldsel TODO
local function yindex(ls, v)
-- index -> '[' expr ']'
save_token(ls)
luaX.next(ls) -- skip the '['
expr(ls, v)
luaK.exp2val(ls.fs, v)
checknext(ls, ']')
end
-- recfield TODO
-- closelistfield TODO
-- lastlistfield TODO
-- listfield TODO
-- field TODO
-- constructor TODO
-- parlist TODO
local function body(ls, e, ismethod, line)
-- body -> '(' parlist ')' block END
-- TODO
error("NYI")
end
local function explist(ls, v)
-- explist -> expr { ',' expr }
local n = 1 -- at least one expression
expr(ls, v)
while testnext(ls, ',') do
luaK.exp2nextreg(ls.fs, v)
expr(ls, v)
n = n + 1
end
return n
end
local function funcargs(ls, f, line)
local fs = ls.fs
local args = {}
local base, nparams
local tk = ls.t_token
if tk == '(' then -- funcargs -> '(' [ explist ] ')'
save_token(ls)
luaX.next(ls)
if ls.t_token == ')' then -- arg list is empty?
args.k = VVOID
else
explist(ls, args)
luaK.setmultret(fs, args)
end
check_match(ls, ')', '(', line)
elseif tk == '{' then -- funcargs -> constructor
constructor(ls, args)
elseif tk == TK.STRING then -- funcargs -> STRING
codestring(ls, args, ls.t_seminfo)
save_token(ls)
luaX.next(ls) -- must use 'seminfo' before 'next'
else
luaX.syntaxerror(ls, "function arguments expected")
end
--lua_assert(f.k == VNONRELOC)
base = f.val -- base register for call
if hasmultret(args.k) then
nparams = LUA_MULTRET -- open call
else
if args.k ~= VVOID then
luaK.exp2nextreg(fs, args) -- close last argument
end
nparams = fs.freereg - (base+1)
end
init_exp(f, VCALL, luaK.codeABC(fs, OP_CALL, base, nparams+1, 2))
luaK.fixline(fs, line)
fs.freereg = base+1 -- call remove function and arguments and leaves
-- (unless changed) one result
end
local suffixedexp -- hm.
;(function() -- avoid issues with 200 locals or w/e
local function primaryexp(ls, v)
local tk = ls.t_token
if tk == '(' then
local line = ls.linenumber
save_token(ls)
luaX.next(ls)
expr(ls, v)
check_match(ls, ')', '(', line)
luaK.dischargevars(ls.fs, v)
elseif tk == TK.NAME then
singlevar(ls, v)
else
luaX.syntaxerror(ls, "unexpected symbol")
end
end
function suffixedexp(ls, v)
-- suffixedexp ->
-- primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
local fs = ls.fs
local line = ls.linenumber
primaryexp(ls, v)
repeat
local tk = ls.t_token
if tk == '.' then -- fieldsel
fieldsel(ls, v)
elseif tk == '[' then -- '[' exp1 ']'
local key = {}
luaK.exp2anyregup(fs, v)
yindex(ls, key)
luaK.indexed(fs, v, key)
elseif tk == ':' then -- ':' NAME funcargs
local key = {}
save_token(ls)
luaX.next(ls)
checkname(ls, key)
luaK.self(fs, v, key)
funcargs(ls, v, line)
elseif tk == '(' or tk == TK.STRING or tk == '{' then -- funcargs
luaK.exp2nextreg(fs, v)
funcargs(ls, v, line)
else
return
end
until nil
end
local function simpleexp(ls, v)
-- simpleexp -> FLT | INT | STRING | NIL | TRUE | FALSE | ... |
-- constructor | FUNCTION body | suffixedexp
local tk = ls.t_token
if tk == TK.FLT then
init_exp(v, VKFLT, 0)
v.val = ls.t_seminfo
elseif tk == TK.INT then
init_exp(v, VKINT, 0)
v.val = ls.t_seminfo
elseif tk == TK.STRING then
codestring(ls, v, ls.t_seminfo)
elseif tk == TK.NIL then
init_exp(v, VNIL, 0)
elseif tk == TK.TRUE then
init_exp(v, VTRUE, 0)
elseif tk == TK.FALSE then
init_exp(v, VFALSE, 0)
elseif tk == TK.DOTS then -- vararg
local fs = ls.fs
check_condition(ls, fs.f.is_vararg,
"cannot use '...' outside a vararg function")
init_exp(v, VVARARG, luaK.codeABC(fs, OP.VARARG, 0, 1, 0))
elseif tk == '{' then
constructor(ls, v)
elseif tk == TK.FUNCTION then
save_token(ls)
luaX.next(ls)
body(ls, v, 0, ls.linenumber)
else
suffixedexp(ls, v)
end
save_token(ls)
luaX.next(ls)
end
local function getunopr(op)
if op == TK.NOT or
op == '-' or
op == '~' or
op == '#' then
return op
end
end
-- order intentionally swapped
local priority = {
['+'] = {left=10, right=10},
['-'] = {left=10, right=10},
['*'] = {left=11, right=11},
['%'] = {left=11, right=11},
['^'] = {left=14, right=13},
['/'] = {left=11, right=11},
[TK.IDIV] = {left=11, right=11},
['&'] = {left=6, right=6},
['|'] = {left=4, right=4},
['~'] = {left=5, right=5},
[TK.SHL] = {left=7, right=7},
[TK.SHR] = {left=7, right=7},
[TK.CONCAT] = {left=9, right=8},
[TK.EQ] = {left=3, right=3},
['<'] = {left=3, right=3},
[TK.LE] = {left=3, right=3},
[TK.NE] = {left=3, right=3},
['>'] = {left=3, right=3},
[TK.GE] = {left=3, right=3},
[TK.AND] = {left=2, right=2},
[TK.OR] = {left=1, right=1},
}
-- order intentionally swapped
local function getbinopr(op)
if priority[op] then
return op
end
end
local UNARY_PRIORITY = 12
-- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
-- where 'binop' is any binary operator with a priority higher than 'limit'
local function subexpr(ls, v, limit)
enterlevel(ls)
local uop = getunopr(ls.t_token)
if uop then
local line = ls.linenumber
save_token(ls)
luaX.next(ls)
subexpr(ls, v, UNARY_PRIORITY)
luaK.prefix(ls.fs, uop, v, line)
else
simpleexp(ls, v)
end
-- expand while operators have priorities higher than 'limit'
local op = getbinopr(ls.t_token)
while op and priority[op].left > limit do
local line = ls.linenumber
save_token(ls)
luaX.next(ls)
luaK.infix(ls.fs, op, v)
-- read sub-expression with higher priority
local nextop = subexpr(ls, v2, priority[op].right)
luaK_posfix(ls.fs, op, v, v2, line)
op = nextop
end
leavelevel(ls)
return op -- return first untreated operator
end
function expr(ls, v)
subexpr(ls, v, 0)
end
end)()
;(function() -- avoid issues with 200 locals or w/e
-- block TODO
-- check_conflict TODO
-- assignment TODO
-- cond TODO
local function gotostat(ls, pc)
local line = ls.linenumber
local label
if testnext(ls, TK.GOTO) then
label = str_checkname(ls)
else
save_token(ls)
luaX.next(ls) -- skip break
label = "break" -- ?
end
local g = newlabelentry(ls, ls.dyd.gt, label, line, pc)
findlabel(ls, g) -- close it if label already defined
end
-- checkrepeated TODO
local function skipnoopstat(ls)
while ls.t_token == ';' or ls.t_token == TK.DBCOLON do
statement(ls)
end
end
-- labelstat TODO
-- whilestat TODO
-- repeatstat TODO
-- exp1 TODO
-- forbody TODO
-- fornum TODO
-- forlist TODO
-- forstat TODO
-- test_then_block TODO
-- ifstat TODO
-- localfunc TODO
-- localstat TODO
-- funcname TODO
-- funcstat TODO
-- exprstat TODO
local function retstat(ls)
local fs = ls.fs
local e = {}
local first, nret
if block_follow(ls, true) or ls.t_token == ';' then
first, nret = 0, 0
else
nret = explist(ls, e)
if hasmultret(e.k) then
luaK.setmultret(fs, e)
if e.k == VCALL and nret == 1 then -- tail call?
--SET_OPCODE(getinstruction(fs,e), OP_TAILCALL)
--lua_assert(GETARG_A(getinstruction(fs,e)) == fs.nactvar)
end
first = fs.nactvar
nret = LUA_MULTRET
else
if nret == 1 then
first = luaK.exp2anyreg(fs, e)
else
luaK.exp2nextreg(fs, e)
first = fs.nactvar
--lua_assert(nret == fs.freereg - first)
end
end
end
luaK.ret(fs, first, nret)
testnext(ls, ';') -- skip optional semicolon
end
function statement(ls)
local line = ls.linenumber
enterlevel(ls)
local tk = ls.t_token
if tk == ';' then -- stat -> ';' (empty statement)
save_token(ls)
luaX.next(ls) -- skip ';'
elseif tk == TK.IF then -- stat -> ifstat
ifstat(ls, line)
elseif tk == TK.WHILE then -- stat -> whilestat
whilestat(ls, line)
elseif tk == TK.DO then --> stat -> DO block END
save_token(ls)
luaX.next(ls) -- skip DO
block(ls)
check_match(ls, TK_END, TK_DO, line)
elseif tk == TK.FOR then -- stat -> forstat
forstat(ls, line)
elseif tk == TK.REPEAT then -- stat -> repeatstat
repeatstat(ls, line)
elseif tk == TK.FUNCTION then -- stat -> funcstat
funcstat(ls, line)
elseif tk == TK.LOCAL then -- stat -> localstat
save_token(ls)
luaX.next(ls) -- skip LOCAL
if testnext(ls, TK.FUNCTION) then -- local function?
localfunc(ls)
else
localstat(ls)
end
elseif tk == TK.DBCOLON then -- stat -> label
save_token(ls)
luaX.next(ls) -- skip double colon
labelstat(ls, str_checkname(ls), line)
elseif tk == TK.RETURN then -- stat -> retstat
save_token(ls)
luaX.next(ls) -- skip RETURN
retstat(ls)
elseif tk == TK.BREAK -- stat -> breakstat
or tk == TK.GOTO then -- stat -> 'goto' NAME
gotostat(ls, luaK.jump(ls.fs))
else
exprstat(ls)
end
--lua_assert(ls.fs.f.maxstacksize >= ls.fs.freereg and
-- ls.fs.freereg >= ls.fs.nactvar)
ls.fs.freereg = ls.fs.nactvar -- free registers
leavelevel(ls)
end
end)()
local function mainfunc(ls, fs)
local bl = {}
open_func(ls, fs, bl)
fs.f.is_vararg = true
-- we don't worry about these:
--local v = {}
--init_exp(v, VLOCAL, 0)
--newupvalue(fs, ls.envn, &v)
luaX.next(ls)
statlist(ls)
check(ls, parser.EOZ)
close_func(ls)
end
local function worst_cratera_parser(ls) -- luaY.parser
local lexstate, funcstate, cl
lexstate = ls
funcstate = {}
cl = {}
lexstate.h = {}
cl.p = {}
funcstate.f = cl.p
funcstate.f.source = lexstate.source
--lua_assert(iswhite(funcstate.f))
--lexstate.buff = {} -- ???
lexstate.dyd = {actvar = {}, gt = {}, label = {}} -- ???
if not lexstate.linenumber then lexstate.linenumber = 1 end -- not managed by us
lexstate.lastline = 1
mainfunc(lexstate, funcstate)
--lua_assert(!funcstate.prev and funcstate.nups == 1 and !lexstate.fs)
--lua_assert(#dyd.actvar == 0 and #dyd.gt == 0 and #dyd.label == 0)
return cl -- close enough
end
local defs = selfify({})
defs[parser.EOZ] = parser.FALLBACK
defs[parser.FALLBACK] = function(state, token) return mainfunc, true end
defs[parser.FALLBACK] = function(state, token)
local coro = state.coro
if not coro then
coro = coroutine.create(worst_cratera_parser)
state.coro = coro
state.t = {} -- token
assert(coroutine.resume(coro, state))
end
local _, override = assert(coroutine.resume(coro, token))
if override then return override end
return "self"
end
return {
defs = defs,

View File

@ -22,17 +22,30 @@ local parser = require "parser"
local luatokens = require "luatokens"
local compiler = require "compiler"
local CRATERA_SEED = nil -- TODO
local LUA_SIGNATURE = string.dump(function() end):sub(1,1)
local function cratera_load(reader)
local function cratera_load(reader, ...)
local chunkname, mode, env = ...
if type(reader) == "string" and reader:sub(1,1) == LUA_SIGNATURE then
-- bytecode
return (loadstring or load)(reader, ...)
end
local f, s, i = parser.stream(luatokens.defs, reader)
if type(s[parser.DATA]) == "string" and s[parser.DATA]:sub(1,1) == LUA_SIGNATURE then
-- bytecode
local function fn()
fn = reader
return s[parser.DATA]
end
return (loadstring or load)(function() return fn() end, ...)
end
local nl = 1
local otherstate = {}
local otherstate = {source=chunkname} -- FIXME
local f, s, i = parser.stream(compiler.defs, function()
local tokens
repeat
local pos, state, transemsg, etoken, estate = f(s, i)
otherstate.line = state.line
otherstate.linenumber = state.line
i = pos
if not i then return nil end
if not state then error(transemsg) end

432
dirtycompiler.lua Normal file
View File

@ -0,0 +1,432 @@
--[[
This file is part of cratera.lua - pure-Lua Cratera-to-Lua transpiler
Copyright (C) 2019 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
-- slow af but works
-- need to run this first
local is51 = (function() local _ENV = {hasenv = true} return not hasenv end)()
local parser = require "parser"
local selfify = parser.selfify
local STATE = parser.STATE
local luatokens = require "luatokens"
local reverse_keywords, reverse_tokens = luatokens.reverse_keywords, luatokens.reverse_tokens
local TK = luatokens.TK
local error, assert, ipairs, tostring, type = error, assert, ipairs, tostring, type
-- try to avoid making too many locals because Lua has a limit to how many locals you can have
local math = {huge=math.huge, floor=math.floor}
local string = {format=string.format, byte=string.byte, gsub=string.gsub}
local table = {insert=table.insert, remove=table.remove}
-- marker for use with selfify()
local SELF = {}
-- other markers
local FINISH = {}
local START_OF_STMT = {}
local END_OF_STMT = {}
local END_OF_CRATERA = {}
-- implementation of myobj:[mytrait].myfunction(...)
local CRATERA_FUNCTION = "function(myobj, mytrait, myfunction, ...) return myobj[mytrait][myfunction](myobj, ...) end"
local EXTRA_DATA = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true, [END_OF_STMT] = true}
local function is_tk(results, i, tk)
-- needed to prevent accidentally processing string literals as tokens
-- (note: it's generally safe, and faster, to do results[i] == tk,
-- if tk is one of the values in the TK.* table.)
return not EXTRA_DATA[results[i-1]] and results[i] == tk
end
local function ignore_newlines(results, i)
-- skip '\n' and return the new i
while is_tk(results, i, '\n') do -- ignore newlines
i = i - 1
end
return i
end
-- -- --
local defs = selfify({}, SELF)
local finish = selfify({}, SELF)
finish[parser.EOZ] = function(state, token)
local results = state.results
local tk = table.remove(results)
print((tk == START_OF_STMT and "START_OF_STMT") or (tk == END_OF_STMT and "END_OF_STMT") or tostring(tk))
if tk == TK.FLT then
local token = table.remove(results)
local extra, num, den = 1, token, 1
assert(token == token and token >= 0, "NYI") -- the tokenizer should never output NaNs or negative values
if token == math.huge then -- the tokenizer *can* output math.huge tho
num, den = 1, 0
else
while num ~= math.floor(num) do
num = num * 2 -- always safe (I think)
local oldden = den
den = den * 2
if den == math.huge then -- subnormals or something?
extra = oldden
den = 2
end
end
end
table.insert(state, string.format('((%d/%d)/%d)', num, den, extra))
elseif tk == TK.INT then
local v = table.remove(results)
if v == math.mininteger then
-- corner case ( https://github.com/lua/lua/commit/707b0ba6e2dbfd58cf1167dae0e17975904b18aa )
table.insert(state, string.format('0x%x', v))
else
table.insert(state, string.format('(%d)', v)) -- may be negative (overflow)
end
elseif tk == TK.STRING then
-- lua tends to use a backslash and a newline but we already do newline processing,
-- so we need to replace the escaped newline ("\\\n") with a newline escape ("\\n").
-- additionally lua 5.1 doesn't handle control characters other than '\0' and '\r' so we need to escape them as well
local fmted = string.format('%q', table.remove(results))
fmted = string.gsub(fmted, '\n', 'n')
if is51 then
fmted = string.gsub(fmted, "%c", function(c) return string.format("\\%03d", string.byte(c)) end)
end
table.insert(state, fmted)
elseif tk == TK.NAME then
table.insert(state, table.remove(results))
elseif type(tk) == "string" then
table.insert(state, tk)
elseif tk then
-- START_OF_STMT and END_OF_STMT are neither keywords nor tokens; this should error in that case.
table.insert(state, assert(reverse_keywords[tk] or reverse_tokens[tk]))
else
return {}
end
return SELF
end
defs[FINISH] = finish
defs[parser.EOZ] = function(state, token)
local results = state.results
-- flip results around
local len = results.n
for i=1, len do
local j = len-i+1
if i >= j then
break
end
results[i], results[j] = results[j], results[i]
end
return FINISH
end
defs[parser.FALLBACK] = function(state, token)
local results = state.results or (function() state.results = {} return state.results end)()
do -- handle newlines. this allows error messages to correctly map between lua and cratera
local oldline = state.oldline or 1
local linenumber = state.linenumber or 1
if linenumber > oldline then
local count = linenumber-oldline
local len = (results.n or 0)
for i=1, count do
results[len+i] = '\n'
end
results.n = len + count
if EXTRA_DATA[results[len]] then -- we're in the middle of a token with extra data. fix it up.
results[len], results[results.n] = results[results.n], results[len]
end
end
state.oldline = state.linenumber
end
results.n = (results.n or 0) + 1
results[results.n] = token
if not results.skip then -- don't process string literals as tokens
if token == ':' then
-- figure out whether we're in funcname
local i = results.n - 1 -- skip the ':'
local find_statement = true
--while is_tk(results, i, '\n') do -- ignore newlines
-- i = i - 1
--end
i = ignore_newlines(results, i)
while results[i-1] == TK.NAME do
--while is_tk(results, i-2, '\n') do -- ignore newlines
-- i = i - 1
--end
i = ignore_newlines(results, i-2) + 2
if is_tk(results, i-2, '.') then
-- keep going
i = i - 3
elseif results[i-2] == TK.FUNCTION then -- we're in funcname
find_statement = false -- don't even bother with : handling
break
else
-- found start of statement
find_statement = false
-- mark start
i = i - 1
table.insert(results, i, START_OF_STMT)
results.n = results.n + 1
-- no need to fix existing END_OF_STMT because this code
-- only detects patterns of the form Name {'.' Name} ':',
-- which do not contain subexpressions.
-- mark end
table.insert(results, results.n + 1, END_OF_STMT)
table.insert(results, results.n + 2, i)
results.n = results.n + 2
break
end
end
if find_statement then
while true do
--while is_tk(results, i, '\n') do -- ignore newlines
-- i = i - 1
--end
i = ignore_newlines(results, i)
if is_tk(results, i, ')') then
-- (prefixexp) or (funcargs)
-- find matching '('
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '(') then
depth = depth - 1
elseif is_tk(results, i, ')') then
depth = depth + 1
end
until depth == 0
elseif is_tk(results, i, ']') then
-- [indexing]
-- find matching '['
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '[') then
depth = depth - 1
elseif is_tk(results, i, ']') then
depth = depth + 1
end
until depth == 0
elseif results[i-1] == TK.NAME then
-- Name or '.' Name
i = i - 2
i = ignore_newlines(results, i)
if is_tk(results, i, '.') then
-- skip '.'
i = i - 1
else
-- found start of statement
break
end
elseif is_tk(results, i, '}') then
-- prefixexp '{' table '}'
local newi = i
local depth = 1
repeat
newi = newi - 1
if is_tk(results, newi, '[') then
depth = depth - 1
elseif is_tk(results, newi, ']') then
depth = depth + 1
end
until depth == 0
local checki = ignore_newlines(results, newi-1)
-- do I need these checks?
if is_tk(results, checki, ']') or
is_tk(results, checki, '}') or
is_tk(results, checki, ')') or
results[checki-1] == TK.NAME or
results[checki-1] == TK.STRING then
i = newi
else
-- syntax error?
error("syntax error")
end
elseif results[i-1] == TK.STRING then
-- prefixexp "string"
-- prefixexp 'string'
-- prefixexp [[string]]
local newi = i-1
local checki = ignore_newlines(results, newi-1)
-- do I need these checks?
if is_tk(results, checki, ']') or
is_tk(results, checki, '}') or
is_tk(results, checki, ')') or
results[checki-1] == TK.NAME or
results[checki-1] == TK.STRING then
i = newi
else
-- syntax error?
error("syntax error")
end
else
-- found start of statement
break
end
i = i - 1
end
-- mark start
i = i + 1
table.insert(results, i, START_OF_STMT)
results.n = results.n + 1
-- fix existing END_OF_STMT
for k=i, #results do
if results[k] == END_OF_STMT then
local v = results[k+1]
if v > i then -- this should always be true?
results[k+1] = v + 1
end
end
end
-- mark end
table.insert(results, results.n + 1, END_OF_STMT)
table.insert(results, results.n + 2, i)
results.n = results.n + 2
end
elseif token == '(' or token == '{' or token == TK.STRING then
local i = results.n - 1 -- skip the '(' / '{' / TK_STRING
i = ignore_newlines(results, i)
-- possible patterns:
-- ':' Name '(' -- plain Lua thing, ignore
-- ':' Name '.' Name '(' -- cratera string traits
-- ':' '[' exp ']' '.' Name '(' -- cratera object traits
-- ':' '[' exp ']' '(' -- supported in lua 5.3 cratera patch but no reason to support it here.
if results[i-1] == TK.NAME then
local tk_myfunction = i-1
-- maybe cratera
i = ignore_newlines(results, i-2)
if results[i-1] == END_OF_STMT then
-- lua, but we need to fix it up
-- we could just replace them with dummies, but
local pos = results[i]
table.remove(results, i) -- remove END_OF_STMT's value
table.remove(results, i-1) -- remove END_OF_STMT
table.remove(results, pos) -- remove START_OF_STMT
results.n = results.n - 3 -- adjust length
assert(results[i-3] == ':')
elseif is_tk(results, i, '.') then
-- maybe cratera
local tk_dot = i
local inject_cratera = false
i = ignore_newlines(results, i-1)
if results[i-1] == TK.NAME then
local tk_mytrait = i-1
i = ignore_newlines(results, i-2)
if results[i-1] == END_OF_STMT then
assert(token == '(', "unimplemented")
-- definitely cratera (stmt ':' Name '.' Name '(')
-- convert into '(' stmt ',' String ',' String
-- convert names into strings
results[tk_mytrait] = TK.STRING
inject_cratera = true
end -- else not cratera
elseif is_tk(results, i, ']') then
local tk_right = i
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '[') then
depth = depth - 1
elseif is_tk(results, i, ']') then
depth = depth + 1
end
until depth == 0
local tk_left = i
i = ignore_newlines(results, i-1)
if results[i-1] == END_OF_STMT then
assert(token == '(', "unimplemented")
-- definitely cratera (':' '[' exp ']' '.' Name '(')
-- convert into '(' stmt ',' '(' exp ')' ',' String
-- replace '[' and ']'
results[tk_right] = ')'
results[tk_left] = '('
inject_cratera = true
end -- else not cratera
end
if inject_cratera then
-- convert name into string
results[tk_myfunction] = TK.STRING
-- replace '.' with ','
results[tk_dot] = ','
local pos = results[i]
-- remove END_OF_STMT
table.remove(results, i-1)
table.remove(results, i-1)
results.n = results.n - 2
-- replace ':' with ','
results[ignore_newlines(results, i-2)] = ','
-- replace START_OF_STMT with '(', and '(' with ','
results[pos], results[results.n] = '(', ','
-- inject cratera
table.insert(results, pos, ')')
table.insert(results, pos, CRATERA_FUNCTION)
table.insert(results, pos, '(')
-- check for potential prefixexp and correct for it
if is_tk(results, pos-1, ']') or
is_tk(results, pos-1, '}') or
is_tk(results, pos-1, ')') or
results[pos-2] == TK.NAME or
results[pos-2] == TK.STRING then
table.insert(results, pos, ';')
results.n = results.n + 1
end
results.n = results.n + 3
-- tag it for '(' ')' (no argument) calls
results.n = results.n + 1
results[results.n] = END_OF_CRATERA
end
end -- else not cratera
end
elseif token == '}' then
-- TODO unimplemented
elseif token == ')' then
local i = results.n - 1 -- skip the ')'
i = ignore_newlines(results, i)
if results[i] == END_OF_CRATERA then
-- '(' CRATERA_FUNCTION ')' '(' something ',' END_OF_CRATERA ')'
-- need to fix it up into
-- '(' CRATERA_FUNCTION ')' '(' something ')'
table.remove(results, i-1)
table.remove(results, i-1)
results.n = results.n - 2
else
-- still might need to remove an END_OF_CRATERA somewhere
i = i + 1
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '(') then
depth = depth - 1
elseif is_tk(results, i, ')') then
depth = depth + 1
elseif results[i] == END_OF_CRATERA then
table.remove(results, i)
results.n = results.n - 1
break
elseif not results[i] then
error("syntax error")
end
until depth == 0
end
end
end
results.skip = EXTRA_DATA[token]
return SELF
end
return {defs = defs}

84
dirtycratera.lua Normal file
View File

@ -0,0 +1,84 @@
--[[
cratera.lua - pure-Lua Cratera-to-Lua transpiler
Copyright (C) 2019 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
-- This code is highly experimental and not very good
local parser = require "parser"
local luatokens = require "luatokens"
local compiler = require "dirtycompiler"
local LUA_SIGNATURE = string.dump(function() end):sub(1,1)
local function cratera_load(reader, ...)
local chunkname, mode, env = ...
if type(reader) == "string" and reader:sub(1,1) == LUA_SIGNATURE then
-- bytecode
return (loadstring or load)(reader, ...)
end
local f, s, i = parser.stream(luatokens.defs, reader)
if type(s[parser.DATA]) == "string" and s[parser.DATA]:sub(1,1) == LUA_SIGNATURE then
-- bytecode
local function fn()
fn = reader
return s[parser.DATA]
end
return (loadstring or load)(function() return fn() end, ...)
end
local nl = 1
local otherstate = {source=chunkname} -- FIXME
local f, s, i = parser.stream(compiler.defs, function()
local tokens
repeat
local pos, state, transemsg, etoken, estate = f(s, i)
otherstate.linenumber = state.line
i = pos
if not i then return nil end
if not state then error(transemsg) end
tokens = {}
for i,v in ipairs(state) do
state[i] = nil
tokens[i] = v
end
until #tokens > 0 or not transemsg
return tokens
end, otherstate)
local function fn()
function fn()
local tokens
repeat
local pos, state, transemsg, etoken, estate, est = f(s, i)
i = pos
if not i then return nil end
if not state then error(transemsg .. " " .. tostring(etoken)) end
tokens = {""}
for i,v in ipairs(state) do
state[i] = nil
tokens[i+1] = v
end
until #tokens > 1 or not transemsg
return table.concat(tokens, " ")
end
local ret = fn()
return string.sub(ret, 2)
end
return load(function()
return fn()
end)
end
return {load = cratera_load}

View File

@ -89,6 +89,7 @@ local TK_AND, TK_BREAK,
TK_SHL, TK_SHR,
-- misc
TK_DBCOLON, TK_EOS,
-- values/constants
TK_FLT, TK_INT, TK_NAME, TK_STRING =
{}, {},
{}, {}, {}, {}, {}, {}, {},
@ -748,13 +749,16 @@ local tokens = {
TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
}
local TK = {}
for k,v in pairs(tokens) do
setmetatable(v, {__name=k, __tostring=function(self) return getmetatable(self).__name end})
TK[k:sub(4)] = v
end
return {
defs = defs,
tokens = tokens,
TK = TK,
reverse_keywords = reverse_keywords,
reverse_tokens = {
[TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",

View File

@ -176,6 +176,7 @@ local COLLECT = {}
return {
STATE = STATE,
DATA = DATA,
COLLECT = COLLECT,
EOZ = EOZ,
FALLBACK = FALLBACK,

View File

@ -1,17 +1,38 @@
local function printr(...)
print(...)
return ...
end
local collect = {}
local function printr_collect(...)
table.insert(collect, (...))
return printr(...)
end
-- used to print what the lua parser (load) is seeing, after cratera has done its thing
loadstring = nil
local realload = load
load = function(target, ...)
if type(target) == "function" then
return realload(function() return printr(target()) end, ...)
return realload(function() return printr_collect(target()) end, ...)
else
return realload(printr(target), ...)
return realload(printr_collect(target), ...)
end
end
local cratera = require "cratera"
local cratera = require "dirtycratera"
assert(printr(cratera.load("")))()
-- first test: does it handle lua code properly?
assert(printr(cratera.load(io.lines("dirtycompiler.lua", "*a")())))()
print("-----------------------------------------------------------------------------------------------------------------------")
print(table.concat(collect))
collect = {}
-- second test: does it handle cratera tests properly?
assert(printr(cratera.load(io.lines("tests.cratera", "*a")())))()
print("-----------------------------------------------------------------------------------------------------------------------")
print(table.concat(collect))

86
tests.cratera Normal file
View File

@ -0,0 +1,86 @@
-- Cratera-specific tests. Run Lua test suite separately.
local t = setmetatable({}, { __tostring=function()return"t"end})
local F = {}
local T = {}
t.t = t
t.tt = t
t[T] = t
t.f = print
t.ff = print
t.g = function(a) print(a[1]) end
t[F] = print
local _f="f"
local _t="t"
-- print("------ t:[k]()")
-- t:f(1) -- plain old lua
-- t:[_f](2) -- simple string key in register
-- t:[string.char(string.byte("f"))](3,32,33) -- string key from function
-- t:["f".."f"](4) -- string key from concatenation
-- t:["f"..string.sub("afun",2,2)](5,52,53) -- concatenation with function result
-- t:[(string.sub("afun",2,2))](6,62,63) -- function result in parentheses
-- t:[(function()return"f"end)()](7) -- closure in key
-- -- be careful with the ambiguous function call!!!
-- ;(function()return t end)():[(function()return"f"end)()](8) -- closure in object and in key
-- t:[F](9) -- object key
-- standard lua tests (compiler/passthrough)
do
print("------ standard lua tests (compiler/passthrough)")
local x
t["t"]:f(1)
end
print("------ t:[k].f()")
t:t.f(1) -- string identifier
t:[_t].f(2) -- string key in register
t:[string.char(string.byte("t"))].f(3,32,33) -- string key from function
t:["t".."t"].f(4) -- string key from concatenation
t:["t"..string.sub("atable",2,2)].f(5,52,53) -- concatenation with function result
t:[(string.sub("atable",2,2))].f(6,62,63) -- function result in parentheses
t:[(function()return"t"end)()].f(7) -- closure in key
do end(function()return t end)():[(function()return"t"end)()].f(8) -- closure in object and in key, with "end" keyword at the start
-- be careful with the ambiguous function call!!!
;(function()return t end)():[(function()return"t"end)()].f(9) -- closure in object and in key, with semicolon at the start
t:[T].f(10) -- object key
_=(t:[_t].f(11)) -- inside ()
t:[_t].g {12} -- table call
t:[_t].f "13" -- string call
entity = {}
inventory = {get=false, set=false, size=false}
inventory.new=function(size)
local t = {size=function() return size end}
function t.set(e, i, o)
if i <= 0 or i > e:[inventory].size() then error() end
e[inventory][i] = o
end
function t.get(e, i)
if i <= 0 or i > e:[inventory].size() then error() end
return e[inventory][i]
end
return t
end
inventory.of=function(e) -- helper for passing standalone inventories around
return {get=function(...)return e:[inventory].get(...)end, set=function(...)return e:[inventory].set(...)end, size=function(...)return e:[inventory].size(...)end}
end
entity[inventory] = inventory.new(5)
entity:[inventory].set(1, "Hello World!")
print(entity:[inventory].get(1))
for i=1, entity:[inventory].size() do
print(i, entity:[inventory].get(i))
end
local myinv = inventory.of(entity)
for i=1, myinv.size() do
print("wrapped", i, myinv.get(i))
end