diff options
-rw-r--r-- | test/unit/formatc.lua | 236 | ||||
-rw-r--r-- | test/unit/helpers.moon | 97 | ||||
-rw-r--r-- | test/unit/preprocess.moon | 155 | ||||
-rw-r--r-- | test/unit/set.moon | 72 |
4 files changed, 544 insertions, 16 deletions
diff --git a/test/unit/formatc.lua b/test/unit/formatc.lua new file mode 100644 index 0000000000..64e651e8da --- /dev/null +++ b/test/unit/formatc.lua @@ -0,0 +1,236 @@ +--[[ Copyright (c) 2009 Peter "Corsix" Cawley + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. --]] + +-- this C parser was taken from Corsix-TH, I'm sure this could be done much +-- better (i.e.: I think everything I do could be substitutions made with LPeg +-- during parsing), but I've just learned enough basic LPeg to make this +-- work. +-- see: http://lua-users.org/wiki/LpegRecipes + +local lpeg = require "lpeg" + +local C, P, R, S, V = lpeg.C, lpeg.P, lpeg.R, lpeg.S, lpeg.V +local Carg, Cc, Cp, Ct = lpeg.Carg, lpeg.Cc, lpeg.Cp, lpeg.Ct + +local tokens = P { "tokens"; + -- Comment of form /* ... */ + comment = Ct(P"/*" * C((V"newline" + (1 - P"*/"))^0) * P"*/" * Cc"comment"), + + -- Single line comment + line_comment = Ct(P"//" * C((1 - V"newline")^0) * Cc"comment_line"), + + -- Single platform independant line break which increments line number + newline = (P"\r\n" + P"\n\r" + S"\r\n") * (Cp() * Carg(1)) / function(pos, state) + state.line = state.line + 1 + state.line_start = pos + end, + + -- Line continuation + line_extend = Ct(C(P[[\]] * V"newline") * Cc"line_extend"), + + -- Whitespace of any length (includes newlines) + whitespace = Ct(C((S" \t" + V"newline")^1) * Cc"whitespace"), + + -- Special form of #include with filename followed in angled brackets (matches 3 tokens) + include = Ct(C(P"#include") * Cc"preprocessor") * + Ct(C(S" \t"^1) * Cc"whitespace") * + Ct(C(P"<" * (1 - P">")^1 * P">") * Cc"string"), + + -- Preprocessor instruction + preprocessor = V"include" + + Ct(C(P"#" * P" "^0 * ( P"define" + P"elif" + P"else" + P"endif" + P"#" + + P"error" + P"ifdef" + P"ifndef" + P"if" + P"import" + + P"include" + P"line" + P"pragma" + P"undef" + P"using" + + P"pragma" + ) * #S" \r\n\t") * Cc"preprocessor"), + + -- Identifier of form [a-zA-Z_][a-zA-Z0-9_]* + identifier = Ct(C(R("az","AZ","__") * R("09","az","AZ","__")^0) * Cc"identifier"), + + -- Single character in a string + string_char = R("az","AZ","09") + S"$%^&*()_-+={[}]:;@~#<,>.!?/ \t" + (P"\\" * S[[ntvbrfa\?'"0x]]), + + -- String literal + string = Ct(C(P"'" * (V"string_char" + P'"')^0 * P"'" + + P'"' * (V"string_char" + P"'")^0 * P'"') * Cc"string"), + + -- Operator + operator = Ct(C(P">>=" + P"<<=" + P"..." + + P"::" + P"<<" + P">>" + P"<=" + P">=" + P"==" + P"!=" + + P"||" + P"&&" + P"++" + P"--" + P"->" + P"+=" + P"-=" + + P"*=" + P"/=" + P"|=" + P"&=" + P"^=" + S"+-*/=<>%^|&.?:!~,") * Cc"operator"), + + -- Misc. char (token type is the character itself) + char = Ct(C(S"[]{}();") / function(x) return x, x end), + + -- Hex, octal or decimal number + int = Ct(C((P"0x" * R("09","af","AF")^1) + (P"0" * R"07"^0) + R"09"^1) * Cc"integer"), + + -- Floating point number + f_exponent = S"eE" + S"+-"^-1 * R"09"^1, + f_terminator = S"fFlL", + float = Ct(C( + R"09"^1 * V"f_exponent" * V"f_terminator"^-1 + + R"09"^0 * P"." * R"09"^1 * V"f_exponent"^-1 * V"f_terminator"^-1 + + R"09"^1 * P"." * R"09"^0 * V"f_exponent"^-1 * V"f_terminator"^-1 + ) * Cc"float"), + + -- Any token + token = V"comment" + + V"line_comment" + + V"identifier" + + V"whitespace" + + V"line_extend" + + V"preprocessor" + + V"string" + + V"char" + + V"operator" + + V"float" + + V"int", + + -- Error for when nothing else matches + error = (Cp() * C(P(1) ^ -8) * Carg(1)) / function(pos, where, state) + error(("Tokenising error on line %i, position %i, near '%s'") + :format(state.line, pos - state.line_start + 1, where)) + end, + + -- Match end of input or throw error + finish = -P(1) + V"error", + + -- Match stream of tokens into a table + tokens = Ct(V"token" ^ 0) * V"finish", +} + +local function TokeniseC(str) + return tokens:match(str, 1, {line = 1, line_start = 1}) +end + +local function set(t) + local s = {} + for i, v in ipairs(t) do + s[v] = true + end + return s +end + +local C_keywords = set { + "break", "case", "char", "const", "continue", "default", "do", "double", + "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", + "register", "return", "short", "signed", "sizeof", "static", "struct", + "switch", "typedef", "union", "unsigned", "void", "volatile", "while", +} + +-- Very primitive C formatter that tries to put "things" inside braces on one +-- line. This is a step done after preprocessing the C source to ensure that +-- the duplicate line detecter can more reliably pick out identical declarations. +-- +-- an example: +-- struct mystruct +-- { +-- int a; +-- int b; +-- }; +-- +-- would become: +-- struct mystruct +-- { int a; int b; }; +-- +-- The first one will have a lot of false positives (the line '{' for +-- example), the second one is more unique. +local function formatc(str) + local tokens = TokeniseC(str) + local result = {} + local block_level = 0 + local allow_one_nl = false + local end_at_brace = false + + for i, token in ipairs(tokens) do + local typ = token[2] + if typ == '{' then + block_level = block_level + 1 + elseif typ == '}' then + block_level = block_level - 1 + + if block_level == 0 and end_at_brace then + -- if we're not inside a block, we're at the basic statement level, + -- and ';' indicates we're at the end of a statement, so we put end + -- it with a newline. + token[1] = token[1] .. "\n" + end_at_brace = false + end + elseif typ == 'identifier' then + -- static usually indicates an inline header function, which has no + -- trailing ';', so we have to add a newline after the '}' ourselves. + if token[1] == 'static' then + end_at_brace = true + end + elseif typ == 'preprocessor' then + -- preprocessor directives don't end in ';' but need their newline, so + -- we're going to allow the next newline to pass. + allow_one_nl = true + elseif typ == ';' then + if block_level == 0 then + -- if we're not inside a block, we're at the basic statement level, + -- and ';' indicates we're at the end of a statement, so we put end + -- it with a newline. + token[1] = ";\n" + end + elseif typ == 'whitespace' then + -- replace all whitespace by one space + local repl = " " + + -- except when allow_on_nl is true and there's a newline in the whitespace + if string.find(token[1], "[\r\n]+") and allow_one_nl == true then + -- in that case we replace all whitespace by one newline + repl = "\n" + allow_one_nl = false + end + + token[1] = string.gsub(token[1], "%s+", repl) + end + result[#result + 1] = token[1] + end + + return table.concat(result) +end + +-- uncomment the following lines (and comment the return) for standalone +-- operation (very handy for debugging) +local function standalone(...) + require "moonscript" + Preprocess = require("preprocess") + Preprocess.add_to_include_path('./../../src') + + input = Preprocess.preprocess_stream(arg[1]) + local raw = input:read('*all') + input:close() + + local formatted + if #arg == 2 and arg[2] == 'no' then + formatted = raw + else + formatted = formatc(raw) + end + + print(formatted) +end +-- standalone(...) + +return formatc diff --git a/test/unit/helpers.moon b/test/unit/helpers.moon index 77d491a008..e84c569143 100644 --- a/test/unit/helpers.moon +++ b/test/unit/helpers.moon @@ -1,4 +1,19 @@ ffi = require 'ffi' +lpeg = require 'lpeg' +formatc = require 'test.unit.formatc' +Set = require 'test.unit.set' +Preprocess = require 'test.unit.preprocess' + +-- add some standard header locations +-- TODO(aktau, jszakmeister): optionally pass more header locations via env +Preprocess.add_to_include_path('./src') +Preprocess.add_to_include_path('./.deps/usr/include') +Preprocess.add_to_include_path('./build/config') + +if ffi.abi('32bit') + Preprocess.add_to_include_path('/opt/neovim-deps/32/include') +else + Preprocess.add_to_include_path('/opt/neovim-deps/include') -- load neovim shared library testlib = os.getenv 'NVIM_TEST_LIB' @@ -7,22 +22,72 @@ unless testlib libnvim = ffi.load testlib --- Luajit ffi parser doesn't understand preprocessor directives, so --- this helper function removes common directives before passing it the to ffi. --- It will return a pointer to the library table, emulating 'requires' -cimport = (path) -> - header_file = io.open path, 'rb' - - if not header_file - error "cannot find #{path}" - - header = header_file\read '*a' - header_file.close! - header = string.gsub header, '#include[^\n]*\n', '' - header = string.gsub header, '#ifndef[^\n]*\n', '' - header = string.gsub header, '#define[^\n]*\n', '' - header = string.gsub header, '#endif[^\n]*\n', '' - ffi.cdef header +trim = (s) -> + s\match'^%s*(.*%S)' or '' + +-- a Set that keeps around the lines we've already seen +export cdefs +if cdefs == nil + cdefs = Set! + +export imported +if imported == nil + imported = Set! + +-- some things are just too complex for the LuaJIT C parser to digest. We +-- usually don't need them anyway. +filter_complex_blocks = (body) -> + result = {} + for line in body\gmatch("[^\r\n]+") + -- remove all lines that contain Objective-C block syntax, the LuaJIT ffi + -- doesn't understand it. + if string.find(line, "(^)", 1, true) ~= nil + continue + if string.find(line, "_ISwupper", 1, true) ~= nil + continue + result[#result + 1] = line + table.concat(result, "\n") + +-- use this helper to import C files, you can pass multiple paths at once, +-- this helper will return the C namespace of the nvim library. +-- cimport = (path) -> +cimport = (...) -> + -- filter out paths we've already imported + paths = [path for path in *{...} when not imported\contains(path)] + for path in *paths + imported\add(path) + + if #paths == 0 + return libnvim + + -- preprocess the header + stream = Preprocess.preprocess_stream(unpack(paths)) + body = stream\read("*a") + stream\close! + + -- format it (so that the lines are "unique" statements), also filter out + -- Objective-C blocks + body = formatc(body) + body = filter_complex_blocks(body) + + -- add the formatted lines to a set + new_cdefs = Set! + for line in body\gmatch("[^\r\n]+") + new_cdefs\add(trim(line)) + + -- subtract the lines we've already imported from the new lines, then add + -- the new unique lines to the old lines (so they won't be imported again) + new_cdefs\diff(cdefs) + cdefs\union(new_cdefs) + + if new_cdefs\size! == 0 + -- if there's no new lines, just return + return libnvim + + -- request a sorted version of the new lines (same relative order as the + -- original preprocessed file) and feed that to the LuaJIT ffi + new_lines = new_cdefs\to_table! + ffi.cdef(table.concat(new_lines, "\n")) return libnvim diff --git a/test/unit/preprocess.moon b/test/unit/preprocess.moon new file mode 100644 index 0000000000..88580476b2 --- /dev/null +++ b/test/unit/preprocess.moon @@ -0,0 +1,155 @@ +-- helps managing loading different headers into the LuaJIT ffi. Untested on +-- windows, will probably need quite a bit of adjustment to run there. + +ffi = require("ffi") + +ccs = {} + +env_cc = os.getenv("CC") +if env_cc + table.insert(ccs, {path: "/usr/bin/env #{env_cc}", type: "gcc"}) + +if ffi.os == "Windows" + table.insert(ccs, {path: "cl", type: "msvc"}) + +table.insert(ccs, {path: "/usr/bin/env cc", type: "gcc"}) +table.insert(ccs, {path: "/usr/bin/env gcc", type: "gcc"}) +table.insert(ccs, {path: "/usr/bin/env gcc-4.9", type: "gcc"}) +table.insert(ccs, {path: "/usr/bin/env gcc-4.8", type: "gcc"}) +table.insert(ccs, {path: "/usr/bin/env gcc-4.7", type: "gcc"}) +table.insert(ccs, {path: "/usr/bin/env clang", type: "clang"}) +table.insert(ccs, {path: "/usr/bin/env icc", type: "gcc"}) + +quote_me = '[^%w%+%-%=%@%_%/]' -- complement (needn't quote) +shell_quote = (str) -> + if string.find(str, quote_me) or str == '' then + "'" .. string.gsub(str, "'", [['"'"']]) .. "'" + else + str + +-- parse Makefile format dependencies into a Lua table +parse_make_deps = (deps) -> + -- remove line breaks and line concatenators + deps = deps\gsub("\n", "")\gsub("\\", "") + + -- remove the Makefile "target:" element + deps = deps\gsub(".+:", "") + + -- remove redundant spaces + deps = deps\gsub(" +", " ") + + -- split acording to token (space in this case) + headers = {} + for token in deps\gmatch("[^%s]+") + -- headers[token] = true + headers[#headers + 1] = token + + -- resolve path redirections (..) to normalize all paths + for i, v in ipairs(headers) + -- double dots (..) + headers[i] = v\gsub("/[^/%s]+/%.%.", "") + + -- single dot (.) + headers[i] = v\gsub("%./", "") + + headers + +-- will produce a string that represents a meta C header file that includes +-- all the passed in headers. I.e.: +-- +-- headerize({"stdio.h", "math.h", true} +-- produces: +-- #include <stdio.h> +-- #include <math.h> +-- +-- headerize({"vim.h", "memory.h", false} +-- produces: +-- #include "vim.h" +-- #include "memory.h" +headerize = (headers, global) -> + pre = '"' + post = pre + if global + pre = "<" + post = ">" + + formatted = ["#include #{pre}#{hdr}#{post}" for hdr in *headers] + table.concat(formatted, "\n") + +class Gcc + -- preprocessor flags that will hopefully make the compiler produce C + -- declarations that the LuaJIT ffi understands. + @@preprocessor_extra_flags = { + '-D "aligned(ARGS)="', + '-D "__attribute__(ARGS)="', + '-D "__asm(ARGS)="', + '-D "__asm__(ARGS)="', + '-D "__inline__="', + '-D_GNU_SOURCE' + } + + new: (path) => + @path = path + + add_to_include_path: (...) => + paths = {...} + for path in *paths + directive = '-I ' .. '"' .. path .. '"' + @@preprocessor_extra_flags[#@@preprocessor_extra_flags + 1] = directive + + -- returns a list of the headers files upon which this file relies + dependencies: (hdr) => + out = io.popen("#{@path} -M #{hdr} 2>&1") + deps = out\read("*a") + out\close! + + if deps + parse_make_deps(deps) + else + nil + + -- returns a stream representing a preprocessed form of the passed-in + -- headers. Don't forget to close the stream by calling the close() method + -- on it. + preprocess_stream: (...) => + paths = {...} + -- create pseudo-header + pseudoheader = headerize(paths, false) + defines = table.concat(@@preprocessor_extra_flags, ' ') + cmd = ("echo $hdr | #{@path} #{defines} -std=c99 -P -E -")\gsub('$hdr', shell_quote(pseudoheader)) + -- lfs = require("lfs") + -- print("CWD: #{lfs.currentdir!}") + -- print("CMD: #{cmd}") + -- io.stderr\write("CWD: #{lfs.currentdir!}\n") + -- io.stderr\write("CMD: #{cmd}\n") + io.popen(cmd) + +class Clang extends Gcc +class Msvc extends Gcc + +type_to_class = { + "gcc": Gcc, + "clang": Clang, + "msvc": Msvc +} + +find_best_cc = (ccs) -> + for _, meta in pairs(ccs) + version = io.popen("#{meta.path} -v 2>&1") + version\close! + if version + return type_to_class[meta.type](meta.path) + nil + +-- find the best cc. If os.exec causes problems on windows (like popping up +-- a console window) we might consider using something like this: +-- http://scite-ru.googlecode.com/svn/trunk/pack/tools/LuaLib/shell.html#exec +cc = nil +if cc == nil + cc = find_best_cc(ccs) + +return { + includes: (hdr) -> cc\dependencies(hdr) + preprocess_stream: (...) -> cc\preprocess_stream(...) + add_to_include_path: (...) -> cc\add_to_include_path(...) +} diff --git a/test/unit/set.moon b/test/unit/set.moon new file mode 100644 index 0000000000..daa312a2f4 --- /dev/null +++ b/test/unit/set.moon @@ -0,0 +1,72 @@ +-- a set class for fast union/diff, can always return a table with the lines +-- in the same relative order in which they were added by calling the +-- to_table method. It does this by keeping two lua tables that mirror each +-- other: +-- 1) index => item +-- 2) item => index +class Set + new: (items) => + if type(items) == 'table' + tempset = Set() + tempset\union_table(items) + @tbl = tempset\raw_tbl! + @items = tempset\raw_items! + @nelem = tempset\size! + else + @tbl = {} + @items = {} + @nelem = 0 + + -- adds the argument Set to this Set + union: (other) => + for e in other\iterator! + @add(e) + + -- adds the argument table to this Set + union_table: (t) => + for k,v in pairs(t) + @add(v) + + -- substracts the argument Set from this Set + diff: (other) => + if other\size! > @size! + -- this set is smaller than the other set + for e in @iterator! + if other\contains(e) + @remove(e) + else + -- this set is larger than the other set + for e in other\iterator! + if @items[e] + @remove(e) + + add: (it) => + if not @contains(it) + idx = #@tbl + 1 + @tbl[idx] = it + @items[it] = idx + @nelem += 1 + + remove: (it) => + if @contains(it) + idx = @items[it] + @tbl[idx] = nil + @items[it] = nil + @nelem -= 1 + + contains: (it) => + @items[it] or false + + size: => @nelem + raw_tbl: => @tbl + raw_items: => @items + iterator: => pairs(@items) + + to_table: => + -- there might be gaps in @tbl, so we have to be careful and sort first + keys = [idx for idx, _ in pairs(@tbl)] + table.sort(keys) + copy = [@tbl[idx] for idx in *keys] + copy + +return Set |