Commit dbc211f3 authored by bbguimaraes's avatar bbguimaraes
Browse files

wip cpp.lua lexer

parent 68421025
#!/usr/bin/env lua
require "lib.strict"
local output <const> = require "lib.output"
local lexer <const> = require "lib.lexer"
local parser <const> = require "lib.parser"
local span <const> = require "lib.span"
local utils <const> = require "lib.utils"
local verbose = false
local lexeis = false
local lexer_only = false
local color = true
local function parse_args(args)
......@@ -16,12 +21,28 @@ local function parse_args(args)
end
if a == "-v" then
verbose = true
elseif a == "-l" or a == "--lexer-only" then
lexeis = true
lexer_only = true
elseif a == "--lexer" then
lexeis = true
elseif a == "--no-color" then
color = false
end
end
end
local function print_lexeis(f, l)
local ret = true
for id, s, line, col in l:iter() do
ret = ret and id ~= lexer.UNKNOWN
print(string.format(
"%d:%d %s %q",
line, col, lexer.to_str(id), utils.escape(s:str())))
end
return ret
end
local function process_file(f)
if verbose then
io.stderr:write(f, "\n\n")
......@@ -30,9 +51,14 @@ local function process_file(f)
local out <const> = verbose
and output.output:new(color)
or output.nop_output:new()
local parser <const> = parser.parser:new(src, out)
if lexeis then
print_lexeis(f, lexer.new(src, out))
if lexer_only then return end
end
local lexer <const> = lexer.new(src, out)
local parser <const> = parser.parser:new(lexer, out)
local ret <const> = assert(xpcall(parser.parse, debug.traceback, parser))
out:dump(parser, io.stderr)
out:dump(src, parser, io.stderr)
io.write(parser.out, "\n")
return ret
end
......
local lexer <const> = require "lib.lexer"
local stack <const> = require "lib.stack"
local utils <const> = require "lib.utils"
......@@ -11,35 +12,48 @@ local parse_parameters
--- Registers a function-like macro.
--- \param name Name of the macro, i.e. the text that is to be replaced.
--- \param repl_b Beginning of the replacement text.
local function define(out, t, name, src, b, e, repl_b)
local params <const>, params_e <const> = parse_parameters(src, repl_b)
local params_s <const> = src:sub(repl_b, params_e - 1)
repl_b = src:find("%S", params_e) or (#src + 1)
local repl <const> = src:sub(repl_b)
t[name] = {params, repl, not not utils.find(params, "...")}
local function define(l, out, t, name, src, b, e, repl_b)
local params <const>, is_var <const> = parse_parameters(l, src, repl_b)
l:ignore(lexer.SPACE)
local repl <const> = l:concat_until(lexer.NEW_LINE)
t[name] = {params, repl, is_var}
local params_str <const> = {}
for _, x in ipairs(params) do
table.insert(params_str, x:str())
end
out.add_step(
src:sub(b, e),
name,
"added function-like macro %q"
.. " with parameters %q and replacement %q\n",
name, src:sub(repl_b, params_e), repl)
name, table.concat(params_str, ", "), repl)
end
--- Processes the parameter section of a function-like macro definition.
--- \returns Two values:
--- - A list of parameter names.
--- - The position after the parameter definition.
function parse_parameters(src, b)
assert(src:sub(b, b) == "(")
function parse_parameters(l)
local ret <const> = {}
local e = src:find("%)", b)
if not e then
error("invalid argument list: ", src:sub(b))
local is_var = false
while true do
local id, name = l:expect(
lexer.IDENTIFIER, lexer.ELLIPSIS, lexer.CLOSE_PARENS)
if id == lexer.CLOSE_PARENS then
break
end
is_var = is_var or (id == lexer.ELLIPSIS)
table.insert(ret, name)
if l:expect(lexer.COMMA, lexer.CLOSE_PARENS) == lexer.CLOSE_PARENS then
break
end
l:ignore(lexer.SPACE)
end
return utils.split(src:sub(b + 1, e - 1), "[^,]+"), e + 1
return ret, is_var
end
local function undef(t, name)
return utils.remove(t, name) ~= nil
local function undef(out, t, name)
if utils.remove(t, name) == nil then
return false
end
out.add_step(name, "removed function-like macro %q", name)
return true
end
local replace0
......@@ -109,7 +123,7 @@ function fn_args_map(name, args, args_str, params, is_var)
check_argc(name, args_str, #params, #args, is_var)
local ret <const> = {}
for i, x in ipairs(args) do
local p <const> = params[i]
local p <const> = params[i]:str()
if p == "..." then
ret["__VA_ARGS__"] = table.concat(args, ", ", i)
break
......
local span <const> = require "lib.span"
local utils <const> = require "lib.utils"
local lexer <const> = {}
lexer.__index = lexer
local UNKNOWN <const> = 0
local NEW_LINE <const> = 1
local SPACE <const> = 2
local DEFINE <const> = 3
local UNDEFINE <const> = 4
local DOUBLE_POUND <const> = 5
local POUND <const> = 6
local OPEN_PARENS <const> = 7
local CLOSE_PARENS <const> = 8
local COMMA <const> = 9
local ELLIPSIS <const> = 10
local IDENTIFIER <const> = 11
local STR = {
[UNKNOWN] = "UNKNOWN",
[NEW_LINE] = "NEW_LINE",
[SPACE] = "SPACE",
[DEFINE] = "DEFINE",
[UNDEFINE] = "UNDEFINE",
[DOUBLE_POUND] = "DOUBLE_POUND",
[POUND] = "POUND",
[OPEN_PARENS] = "OPEN_PARENS",
[CLOSE_PARENS] = "CLOSE_PARENS",
[COMMA] = "COMMA",
[ELLIPSIS] = "ELLIPSIS",
[IDENTIFIER] = "IDENTIFIER",
}
function lexer:new(src)
return setmetatable({
src = span.new(src),
line = 1,
column = 1,
stack = {},
}, self)
end
local function iter(l) return l:next() end
function lexer:iter() return iter, self end
local function new_line(self)
self.line = self.line + 1
self.column = 1
end
local function ignore_escaped_new_lines(self)
local src <const> = self.src
while true do
local m = src:match("^\\\n")
if not m then
return
end
new_line(self)
src:sub(#m + 1)
end
end
function consume(self, id, m)
local col <const> = self.column
self.column = self.column + #m
self.src:sub(#m + 1)
return id, m, self.line, col
end
function lexer:match()
local src <const> = self.src
ignore_escaped_new_lines(self)
local m = src:match("^\n")
if m then
new_line(self)
return consume(self, NEW_LINE, m)
end
m = src:match("^%s+")
if m then return consume(self, SPACE, m) end
m = src:match("^#")
if m then
local mm = src:match("^define%f[^%w]", 2)
if mm then return consume(self, DEFINE, m:sub(1, 1 + #mm)) end
mm = src:match("^undef%f[^%w]", 2)
if mm then return consume(self, UNDEFINE, m:sub(1, 1 + #mm)) end
mm = src:match("^#", 2)
if mm then return consume(self, DOUBLE_POUND, m:sub(1, 2)) end
return consume(self, POUND, m)
end
m = src:match("^%(")
if m then return consume(self, OPEN_PARENS, m) end
m = src:match("^%)")
if m then return consume(self, CLOSE_PARENS, m) end
m = src:match("^,")
if m then return consume(self, COMMA, m) end
m = src:match("^%.%.%.")
if m then return consume(self, ELLIPSIS, m) end
m = src:match("^[%w_]+%f[^%w]")
if m then return consume(self, IDENTIFIER, m) end
local c <const> = span.new(src)
c:sub(1, 1)
return consume(self, UNKNOWN, c)
end
function lexer:peek()
if #self.stack ~= 0 then
return table.unpack(self.stack[#self.stack])
end
local ret <const> = {self:next()}
table.insert(self.stack, ret)
return table.unpack(ret)
end
function lexer:next()
if #self.stack ~= 0 then
return table.unpack(table.remove(self.stack))
end
if not self.src:empty() then
return self:match()
end
end
function lexer:expect(...)
local id <const>, str <const> = self:next()
if utils.find({...}, id) then
return id, str
end
error(string.format("invalid token %s %q", STR[id], str:str()))
end
function lexer:ignore(...)
if utils.find({...}, (self:peek())) then
table.remove(self.stack)
end
end
function lexer:find(...)
local ids <const> = {...}
while true do
local id <const>, str <const> = self:peek()
if utils.find(ids, id) then
return id, str
end
table.remove(self.stack)
end
end
function lexer:concat_until(...)
local ids <const>, ret <const> = {...}, {}
for id, str in self:iter() do
if utils.find(ids, id) then
break
end
table.insert(ret, str:str())
end
return table.concat(ret, "")
end
return {
UNKNOWN = UNKNOWN,
NEW_LINE = NEW_LINE,
SPACE = SPACE,
DEFINE = DEFINE,
UNDEFINE = UNDEFINE,
DOUBLE_POUND = DOUBLE_POUND,
POUND = POUND,
OPEN_PARENS = OPEN_PARENS,
CLOSE_PARENS = CLOSE_PARENS,
COMMA = COMMA,
ELLIPSIS = ELLIPSIS,
IDENTIFIER = IDENTIFIER,
lexer = lexer,
to_str = function(x) return STR[x] end,
new = function(...) return lexer:new(...) end,
}
local fn <const> = require "lib.fn"
local lexer <const> = require "lib.lexer"
local stack <const> = require "lib.stack"
local utils <const> = require "lib.utils"
......@@ -9,21 +10,22 @@ end
--- Registers an object-like macro.
--- \param name Name of the macro, i.e. the text that is to be replaced.
--- \param repl_b Beginning of the replacement text.
local function define(out, t, name, src, b, e, repl_b)
repl_b = src:find("%S", repl_b) or (#src + 1)
local repl <const> = src:sub(repl_b)
local function define(out, t, name, repl, src)
t[name] = repl
out.add_step(
src:sub(b, e),
name,
"added object-like macro %q with replacement text %q\n",
name, repl)
end
--- Unregisters an object-like macro.
--- \param name As passed to \ref define.
local function undef(t, name)
return utils.remove(t, name) ~= nil
local function undef(out, t, name)
if utils.remove(t, name) == nil then
return false
end
out.add_step(name, "removed object-like macro %q\n", name)
return true
end
--- Checks that \p name is an object-like macro and expands it.
......
......@@ -63,28 +63,25 @@ function output:add_step(stack, src, fmt, ...)
table.insert(t, "")
table.insert(t, "")
end
local ret <const> = string.format("%s%s", pre, table.concat(t, "\n"))
-- io.stderr:write(ret)
table.insert(self.steps, ret)
table.insert(self.steps, string.format("%s%s", pre, table.concat(t, "\n")))
end
local function dump_fn_macro(name, t, f)
local _, params <const>, repl <const> = table.unpack(t)
local params <const>, repl <const> = table.unpack(t)
f:write(string.format(" %q:\n", name))
f:write(" parameters:\n")
for _, x in ipairs(params) do
f:write(string.format(" - %q\n", x))
f:write(string.format(" - %q\n", x:str()))
end
f:write(string.format(" replacement: %q\n", repl))
end
local function dump_obj_macro(name, t, f)
local _, repl <const> = table.unpack(t)
local function dump_obj_macro(name, repl, f)
f:write(string.format(" %q:\n", name))
f:write(string.format(" replacement: %q\n", repl))
end
function output:dump(parser, f)
function output:dump(src, parser, f)
f:write("=== macros ===\n")
if next(parser.fn) then
f:write("\nfunction:\n")
......@@ -94,8 +91,8 @@ function output:dump(parser, f)
end
if next(parser.obj) then
f:write("\nobject:\n")
for name, t in pairs(parser.obj) do
dump_obj_macro(name, t, f)
for name, repl in pairs(parser.obj) do
dump_obj_macro(name, repl, f)
end
end
if #self.steps ~= 0 then
......@@ -106,7 +103,7 @@ function output:dump(parser, f)
end
end
f:write("\n")
f:write("=== input ===\n\n", parser.src, "\n")
f:write("=== input ===\n\n", src:str(), "\n")
f:write("=== output ===\n\n")
end
......
local fn <const> = require "lib.fn"
local lexer <const> = require "lib.lexer"
local obj <const> = require "lib.obj"
local span <const> = require "lib.span"
local stack <const> = require "lib.stack"
......@@ -10,9 +11,9 @@ parser.__index = parser
--- Initializes a new parser.
--- \param src \ref span containing the complete source code fragment.
--- \param output Debug output stack/handler. \see lib.output
function parser:new(src, output)
function parser:new(lexer, output)
local ret <const> = {
src = src,
lexer = lexer,
fn = {},
obj = {},
stack = stack.stack:new(),
......@@ -43,7 +44,7 @@ end
--- Removes all C/++ comments from `src`.
function parser.remove_comments(src)
return src:str():gsub("//.-*\n", ""):gsub("/*.-*/", "")
return (src:str():gsub("//.-*\n", ""):gsub("/*.-*/", ""))
end
--- Creates a new string by replacing the interval `[b,e]` in `src` with `repl`.
......@@ -53,112 +54,67 @@ end
--- Public entry point, processes the entire input and populates `self.out`.
function parser:parse()
local src = span.new(self.remove_comments(self.src))
local out <const> = {}
local i, last <const> = 1, #self.src._s + 1
while not src:empty() do
local dir_src = src:match("^%s*#")
local _, _, b = self.src._s:find("^%s*()#", i)
if dir_src then
src:sub(#dir_src - 1)
local src_ <const>, e <const> = self:line_at(b)
self:parse_directive(src_)
assert(e - b < #src)
src:sub(e - b)
i = e
for id, str in self.lexer:iter() do
if id == lexer.DEFINE then
self:parse_define()
elseif id == lexer.UNDEFINE then
self:parse_undef()
else
b = self.src._s:find("[^\n]", i) or (#self.src._s + 1)
local e <const> = self.src._s:find("()\n%s*()#", b) or #self.src._s
self:process_source(self.src._s:sub(b, e):gsub("\n*$", ""), out)
assert(e + 1 - i < #src)
src:sub(e + 1 - i)
i = e + 1
str = str:str() .. self.lexer:concat_until(lexer.NEW_LINE)
self:process_source(id, str, out)
end
end
self.out = table.concat(out, "\n")
end
--- Returns the complete line at position `i` replacing new-line escapes.
function parser:line_at(i)
assert(self.src._s:sub(i, i) ~= "\n")
local ret <const> = {}
while true do
local b, e = self.src._s:find("\n+%s*", i)
if not b then
table.insert(ret, self.src._s:sub(i))
i = e + 1
break
end
b = b - 1
if self.src._s:sub(b, b) == "\\" then
table.insert(ret, self.src._s:sub(i, b - 1))
i = e + 1
else
table.insert(ret, self.src._s:sub(i, b))
i = e + 1
break
end
end
return table.concat(ret, ""), i
end
--- Processes a full directive (`#…`) line.
function parser:parse_directive(src)
assert(src:sub(1, 1) == "#")
self:add_step(src, "processing directive line\n")
local name <const>, pos <const> = src:match("^#%s*(%w+)%s+()")
if not name then
error("unsupported directive: " .. src)
end
if name == "define" then
self:parse_define(src, pos)
elseif name == "undef" then
self:parse_undef(src:sub(pos))
end
assert(not self.lexer:peek())
end
--- Processes the name/replacement portion of a `#define` line.
function parser:parse_define(src, b)
local _, e, name <const> = src:find("^([%w_]+)", b)
if not name then
error(string.format("invalid identifier %q in #define\n", src))
function parser:parse_define()
self:add_step("TODO", "processing #define line\n")
assert(self.lexer:next() == lexer.SPACE)
local id, name = self.lexer:next()
if id ~= lexer.IDENTIFIER then
error(string.format(
"invalid token %s %q", lexer.to_str(id), name:str()))
end
if fn.defined(self.fn, name) or obj.defined(self.obj, name) then
error("macro redefined: " .. name)
end
local repl_b = e + 1
local si <close> = stack.output_item:new(self.output, nil, src, b, e)
if src:sub(repl_b, repl_b) == "(" then
fn.define(self.out_interface, self.fn, name, src, b, e, repl_b)
local ident <const> = name:str()
id, name = self.lexer:next()
local si <close> = stack.output_item:new(self.output, nil, "TODO", 1, 1)
if id == lexer.OPEN_PARENS then
fn.define(self.lexer, self.out_interface, self.fn, ident, "TODO")
elseif id == lexer.SPACE then
obj.define(
self.out_interface, self.obj, ident,
self.lexer:concat_until(lexer.NEW_LINE), "TODO")
elseif id == lexer.NEW_LINE then
obj.define(self.out_interface, self.obj, ident, "", "TODO")
else
obj.define(self.out_interface, self.obj, name, src, b, e, repl_b)
error(string.format(
"invalid token %s %q", lexer.to_str(id), utils.escape(name:str())))
end
end
--- Processes the name portion of an `#undef` line.
function parser:parse_undef(src)
local name <const> = src:match("^[%w_]+")
if not name then
error(string.format("invalid identifier %q in #undef\n", src))
end
if not (obj.undef(self.obj, name) or fn.undef(self.fn, name)) then
error("undefined macro %q in #undef\n", name)
end
function parser:parse_undef()
self:add_step("TODO", "processing #undef line\n")
assert(self.lexer:next() == lexer.SPACE)
local _, name = self.lexer:expect(lexer.IDENTIFIER)
self.lexer:expect(lexer.NEW_LINE)
name = name:str()
if obj.undef(self.out_interface, self.obj, name) then return end
if fn.undef(self.out_interface, self.fn, name) then return end
error(string.format("undefined macro %q in #undef\n", name))
end
--- Processes a source block without preprocessor directives.
function parser:process_source(src, out)
function parser:process_source(id, src, out)
self:add_step(src, "processing source block\n")
local i = 1
while true do
local b <const>, e <const>, l <const> = src:find("([^\n]+)", i)
if not l then
break
end
local si <close> = stack.item:new(self.stack)
table.insert(out, (self:replace(l)))
i = e + 1
end
local si <close> = stack.item:new(self.stack)
table.insert(out, (self:replace(src)))
end
--- Performs a complete, recursive replacement of a source fragment.
......
local utils <const> = require "lib.utils"
local span <const> = {
__len = function(self) return self.e - self.b end,
}
span.__index = span
local function check(s)
assert(s._s)
assert(s.b)
assert(s.e)