diff options
Diffstat (limited to 'test/unit/formatc.lua')
-rw-r--r-- | test/unit/formatc.lua | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/test/unit/formatc.lua b/test/unit/formatc.lua new file mode 100644 index 0000000000..64e651e8da --- /dev/null +++ b/test/unit/formatc.lua @@ -0,0 +1,236 @@ +--[[ Copyright (c) 2009 Peter "Corsix" Cawley + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. --]] + +-- this C parser was taken from Corsix-TH, I'm sure this could be done much +-- better (i.e.: I think everything I do could be substitutions made with LPeg +-- during parsing), but I've just learned enough basic LPeg to make this +-- work. +-- see: http://lua-users.org/wiki/LpegRecipes + +local lpeg = require "lpeg" + +local C, P, R, S, V = lpeg.C, lpeg.P, lpeg.R, lpeg.S, lpeg.V +local Carg, Cc, Cp, Ct = lpeg.Carg, lpeg.Cc, lpeg.Cp, lpeg.Ct + +local tokens = P { "tokens"; + -- Comment of form /* ... */ + comment = Ct(P"/*" * C((V"newline" + (1 - P"*/"))^0) * P"*/" * Cc"comment"), + + -- Single line comment + line_comment = Ct(P"//" * C((1 - V"newline")^0) * Cc"comment_line"), + + -- Single platform independant line break which increments line number + newline = (P"\r\n" + P"\n\r" + S"\r\n") * (Cp() * Carg(1)) / function(pos, state) + state.line = state.line + 1 + state.line_start = pos + end, + + -- Line continuation + line_extend = Ct(C(P[[\]] * V"newline") * Cc"line_extend"), + + -- Whitespace of any length (includes newlines) + whitespace = Ct(C((S" \t" + V"newline")^1) * Cc"whitespace"), + + -- Special form of #include with filename followed in angled brackets (matches 3 tokens) + include = Ct(C(P"#include") * Cc"preprocessor") * + Ct(C(S" \t"^1) * Cc"whitespace") * + Ct(C(P"<" * (1 - P">")^1 * P">") * Cc"string"), + + -- Preprocessor instruction + preprocessor = V"include" + + Ct(C(P"#" * P" "^0 * ( P"define" + P"elif" + P"else" + P"endif" + P"#" + + P"error" + P"ifdef" + P"ifndef" + P"if" + P"import" + + P"include" + P"line" + P"pragma" + P"undef" + P"using" + + P"pragma" + ) * #S" \r\n\t") * Cc"preprocessor"), + + -- Identifier of form [a-zA-Z_][a-zA-Z0-9_]* + identifier = Ct(C(R("az","AZ","__") * R("09","az","AZ","__")^0) * Cc"identifier"), + + -- Single character in a string + string_char = R("az","AZ","09") + S"$%^&*()_-+={[}]:;@~#<,>.!?/ \t" + (P"\\" * S[[ntvbrfa\?'"0x]]), + + -- String literal + string = Ct(C(P"'" * (V"string_char" + P'"')^0 * P"'" + + P'"' * (V"string_char" + P"'")^0 * P'"') * Cc"string"), + + -- Operator + operator = Ct(C(P">>=" + P"<<=" + P"..." + + P"::" + P"<<" + P">>" + P"<=" + P">=" + P"==" + P"!=" + + P"||" + P"&&" + P"++" + P"--" + P"->" + P"+=" + P"-=" + + P"*=" + P"/=" + P"|=" + P"&=" + P"^=" + S"+-*/=<>%^|&.?:!~,") * Cc"operator"), + + -- Misc. char (token type is the character itself) + char = Ct(C(S"[]{}();") / function(x) return x, x end), + + -- Hex, octal or decimal number + int = Ct(C((P"0x" * R("09","af","AF")^1) + (P"0" * R"07"^0) + R"09"^1) * Cc"integer"), + + -- Floating point number + f_exponent = S"eE" + S"+-"^-1 * R"09"^1, + f_terminator = S"fFlL", + float = Ct(C( + R"09"^1 * V"f_exponent" * V"f_terminator"^-1 + + R"09"^0 * P"." * R"09"^1 * V"f_exponent"^-1 * V"f_terminator"^-1 + + R"09"^1 * P"." * R"09"^0 * V"f_exponent"^-1 * V"f_terminator"^-1 + ) * Cc"float"), + + -- Any token + token = V"comment" + + V"line_comment" + + V"identifier" + + V"whitespace" + + V"line_extend" + + V"preprocessor" + + V"string" + + V"char" + + V"operator" + + V"float" + + V"int", + + -- Error for when nothing else matches + error = (Cp() * C(P(1) ^ -8) * Carg(1)) / function(pos, where, state) + error(("Tokenising error on line %i, position %i, near '%s'") + :format(state.line, pos - state.line_start + 1, where)) + end, + + -- Match end of input or throw error + finish = -P(1) + V"error", + + -- Match stream of tokens into a table + tokens = Ct(V"token" ^ 0) * V"finish", +} + +local function TokeniseC(str) + return tokens:match(str, 1, {line = 1, line_start = 1}) +end + +local function set(t) + local s = {} + for i, v in ipairs(t) do + s[v] = true + end + return s +end + +local C_keywords = set { + "break", "case", "char", "const", "continue", "default", "do", "double", + "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", + "register", "return", "short", "signed", "sizeof", "static", "struct", + "switch", "typedef", "union", "unsigned", "void", "volatile", "while", +} + +-- Very primitive C formatter that tries to put "things" inside braces on one +-- line. This is a step done after preprocessing the C source to ensure that +-- the duplicate line detecter can more reliably pick out identical declarations. +-- +-- an example: +-- struct mystruct +-- { +-- int a; +-- int b; +-- }; +-- +-- would become: +-- struct mystruct +-- { int a; int b; }; +-- +-- The first one will have a lot of false positives (the line '{' for +-- example), the second one is more unique. +local function formatc(str) + local tokens = TokeniseC(str) + local result = {} + local block_level = 0 + local allow_one_nl = false + local end_at_brace = false + + for i, token in ipairs(tokens) do + local typ = token[2] + if typ == '{' then + block_level = block_level + 1 + elseif typ == '}' then + block_level = block_level - 1 + + if block_level == 0 and end_at_brace then + -- if we're not inside a block, we're at the basic statement level, + -- and ';' indicates we're at the end of a statement, so we put end + -- it with a newline. + token[1] = token[1] .. "\n" + end_at_brace = false + end + elseif typ == 'identifier' then + -- static usually indicates an inline header function, which has no + -- trailing ';', so we have to add a newline after the '}' ourselves. + if token[1] == 'static' then + end_at_brace = true + end + elseif typ == 'preprocessor' then + -- preprocessor directives don't end in ';' but need their newline, so + -- we're going to allow the next newline to pass. + allow_one_nl = true + elseif typ == ';' then + if block_level == 0 then + -- if we're not inside a block, we're at the basic statement level, + -- and ';' indicates we're at the end of a statement, so we put end + -- it with a newline. + token[1] = ";\n" + end + elseif typ == 'whitespace' then + -- replace all whitespace by one space + local repl = " " + + -- except when allow_on_nl is true and there's a newline in the whitespace + if string.find(token[1], "[\r\n]+") and allow_one_nl == true then + -- in that case we replace all whitespace by one newline + repl = "\n" + allow_one_nl = false + end + + token[1] = string.gsub(token[1], "%s+", repl) + end + result[#result + 1] = token[1] + end + + return table.concat(result) +end + +-- uncomment the following lines (and comment the return) for standalone +-- operation (very handy for debugging) +local function standalone(...) + require "moonscript" + Preprocess = require("preprocess") + Preprocess.add_to_include_path('./../../src') + + input = Preprocess.preprocess_stream(arg[1]) + local raw = input:read('*all') + input:close() + + local formatted + if #arg == 2 and arg[2] == 'no' then + formatted = raw + else + formatted = formatc(raw) + end + + print(formatted) +end +-- standalone(...) + +return formatc |