diff options
author | Christopher Ferris <cferris@google.com> | 2019-01-08 14:58:07 -0800 |
---|---|---|
committer | Christopher Ferris <cferris@google.com> | 2019-01-10 15:15:15 -0800 |
commit | 658b16fd9078882730861cc2c29cb44828989e52 (patch) | |
tree | bff861d3acba602010b5f3043a605f34ad13e1d6 /libc/kernel/tools/cpp.py | |
parent | e7964158cedf37b50e42ab1988e67eb2154c74f2 (diff) |
Rewrite removeVarsAndFuncs.
The current version has these bugs:
- Adding a semicolon after a function results in the removal of structures
following the function.
- Function like macros get removed on accident rather than on purpose.
- It removes extern "C" { completely, which might not be a bug, but doesn't
seem right.
I couldn't easily fix any of these problems because the code depends heavily
on the header being correct.
New unit tests added for the function to cover all of these cases.
A follow-on CL will include the updated headers.
Bug: 112290385
Test: Passes all new unit tests.
Test: When run on the current kernel headers, the generated headers are
Test: nearly the same, missing data is being added.
Change-Id: Ib22a5f2e78873544e8a9d54e385af1156b2a72bb
Diffstat (limited to 'libc/kernel/tools/cpp.py')
-rwxr-xr-x | libc/kernel/tools/cpp.py | 543 |
1 files changed, 423 insertions, 120 deletions
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py index 336a9c835..1ada59e09 100755 --- a/libc/kernel/tools/cpp.py +++ b/libc/kernel/tools/cpp.py @@ -1037,11 +1037,14 @@ class Block(object): if t.id == '{': buf += ' {' result.append(strip_space(buf)) - indent += 2 + # Do not indent if this is extern "C" { + if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"': + indent += 2 buf = '' newline = True elif t.id == '}': - indent -= 2 + if indent >= 2: + indent -= 2 if not newline: result.append(strip_space(buf)) # Look ahead to determine if it's the end of line. @@ -1221,133 +1224,140 @@ class BlockList(object): function declarations are removed. We only accept typedefs and enum/structs/union declarations. + In addition, remove any macros expanding in the headers. Usually, + these macros are static inline functions, which is why they are + removed. + However, we keep the definitions corresponding to the set of known static inline functions in the set 'keep', which is useful for optimized byteorder swap functions and stuff like that. """ - # NOTE: It's also removing function-like macros, such as __SYSCALL(...) - # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h. - # It could be problematic when we have function-like macros but without - # '}' following them. It will skip all the tokens/blocks until seeing a - # '}' as the function end. Fortunately we don't have such cases in the - # current kernel headers. - - # state = 0 => normal (i.e. LN + spaces) - # state = 1 => typedef/struct encountered, ends with ";" - # state = 2 => var declaration encountered, ends with ";" - # state = 3 => func declaration encountered, ends with "}" + # state = NORMAL => normal (i.e. LN + spaces) + # state = OTHER_DECL => typedef/struct encountered, ends with ";" + # state = VAR_DECL => var declaration encountered, ends with ";" + # state = FUNC_DECL => func declaration encountered, ends with "}" + NORMAL = 0 + OTHER_DECL = 1 + VAR_DECL = 2 + FUNC_DECL = 3 - state = 0 + state = NORMAL depth = 0 - blocks2 = [] - skipTokens = False - for b in self.blocks: - if b.isDirective(): - blocks2.append(b) - else: - n = len(b.tokens) - i = 0 - if skipTokens: - first = n - else: - first = 0 - while i < n: - tok = b.tokens[i] - tokid = tok.id - # If we are not looking for the start of a new - # type/var/func, then skip over tokens until - # we find our terminator, managing the depth of - # accolades as we go. - if state > 0: - terminator = False - if tokid == '{': - depth += 1 - elif tokid == '}': - if depth > 0: - depth -= 1 - if (depth == 0) and (state == 3): - terminator = True - elif tokid == ';' and depth == 0: - terminator = True - - if terminator: - # we found the terminator - state = 0 - if skipTokens: - skipTokens = False - first = i + 1 - - i += 1 - continue - - # Is it a new type definition, then start recording it - if tok.id in ['struct', 'typedef', 'enum', 'union', - '__extension__']: - state = 1 + blocksToKeep = [] + blocksInProgress = [] + blocksOfDirectives = [] + ident = "" + state_token = "" + macros = set() + for block in self.blocks: + if block.isDirective(): + # Record all macros. + if block.directive == 'define': + macro_name = block.define_id + paren_index = macro_name.find('(') + if paren_index == -1: + macros.add(macro_name) + else: + macros.add(macro_name[0:paren_index]) + blocksInProgress.append(block) + # If this is in a function/variable declaration, we might need + # to emit the directives alone, so save them separately. + blocksOfDirectives.append(block) + continue + + numTokens = len(block.tokens) + lastTerminatorIndex = 0 + i = 0 + while i < numTokens: + token_id = block.tokens[i].id + terminator = False + if token_id == '{': + depth += 1 + if (i >= 2 and block.tokens[i-2].id == 'extern' and + block.tokens[i-1].id == '"C"'): + # For an extern "C" { pretend as though this is depth 0. + depth -= 1 + elif token_id == '}': + if depth > 0: + depth -= 1 + if depth == 0: + if state == OTHER_DECL: + # Loop through until we hit the ';' + i += 1 + while i < numTokens: + if block.tokens[i].id == ';': + token_id = ';' + break + i += 1 + # If we didn't hit the ';', just consider this the + # terminator any way. + terminator = True + elif depth == 0: + if token_id == ';': + if state == NORMAL: + blocksToKeep.extend(blocksInProgress) + blocksInProgress = [] + blocksOfDirectives = [] + state = FUNC_DECL + terminator = True + elif (state == NORMAL and token_id == '(' and i >= 1 and + block.tokens[i-1].kind == TokenKind.IDENTIFIER and + block.tokens[i-1].id in macros): + # This is a plain macro being expanded in the header + # which needs to be removed. + blocksToKeep.extend(blocksInProgress) + if lastTerminatorIndex < i - 1: + blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1])) + blocksInProgress = [] + blocksOfDirectives = [] + + # Skip until we see the terminating ')' i += 1 - continue - - # Is it a variable or function definition. If so, first - # try to determine which type it is, and also extract - # its name. - # - # We're going to parse the next tokens of the same block - # until we find a semicolon or a left parenthesis. - # - # The semicolon corresponds to a variable definition, - # the left-parenthesis to a function definition. - # - # We also assume that the var/func name is the last - # identifier before the terminator. - # - j = i + 1 - ident = "" - while j < n: - tokid = b.tokens[j].id - if tokid == '(': # a function declaration - state = 3 - break - elif tokid == ';': # a variable declaration - state = 2 - break - if b.tokens[j].kind == TokenKind.IDENTIFIER: - ident = b.tokens[j].id - j += 1 - - if j >= n: - # This can only happen when the declaration - # does not end on the current block (e.g. with - # a directive mixed inside it. - # - # We will treat it as malformed because - # it's very hard to recover from this case - # without making our parser much more - # complex. - # - logging.debug("### skip unterminated static '%s'", - ident) - break - - if ident in keep: - logging.debug("### keep var/func '%s': %s", ident, - repr(b.tokens[i:j])) + paren_depth = 1 + while i < numTokens: + if block.tokens[i].id == ')': + paren_depth -= 1 + if paren_depth == 0: + break + elif block.tokens[i].id == '(': + paren_depth += 1 + i += 1 + lastTerminatorIndex = i + 1 + elif (state != FUNC_DECL and token_id == '(' and + state_token != 'typedef'): + blocksToKeep.extend(blocksInProgress) + blocksInProgress = [] + blocksOfDirectives = [] + state = VAR_DECL + elif state == NORMAL and token_id in ['struct', 'typedef', + 'enum', 'union', + '__extension__']: + state = OTHER_DECL + state_token = token_id + elif block.tokens[i].kind == TokenKind.IDENTIFIER: + if state != VAR_DECL or ident == "": + ident = token_id + + if terminator: + if state != VAR_DECL and state != FUNC_DECL or ident in keep: + blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1])) + blocksToKeep.extend(blocksInProgress) else: - # We're going to skip the tokens for this declaration - logging.debug("### skip var/func '%s': %s", ident, - repr(b.tokens[i:j])) - if i > first: - blocks2.append(Block(b.tokens[first:i])) - skipTokens = True - first = n - - i += 1 - - if i > first: - #print "### final '%s'" % repr(b.tokens[first:i]) - blocks2.append(Block(b.tokens[first:i])) - - self.blocks = blocks2 + # Only keep the directives found. + blocksToKeep.extend(blocksOfDirectives) + lastTerminatorIndex = i + 1 + blocksInProgress = [] + blocksOfDirectives = [] + state = NORMAL + ident = "" + state_token = "" + i += 1 + if lastTerminatorIndex < numTokens: + blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens])) + if len(blocksInProgress) > 0: + blocksToKeep.extend(blocksInProgress) + self.blocks = blocksToKeep def replaceTokens(self, replacements): """Replace tokens according to the given dict.""" @@ -1938,6 +1948,299 @@ class OptimizerTests(unittest.TestCase): expected = "" self.assertEqual(self.parse(text), expected) +class FullPathTest(unittest.TestCase): + """Test of the full path parsing.""" + + def parse(self, text, keep=None): + if not keep: + keep = set() + out = utils.StringOutput() + blocks = BlockParser().parse(CppStringTokenizer(text)) + blocks.removeVarsAndFuncs(keep) + blocks.replaceTokens(kernel_token_replacements) + blocks.optimizeAll(None) + blocks.write(out) + return out.get() + + def test_function_removed(self): + text = """\ +static inline __u64 function() +{ +} +""" + expected = "" + self.assertEqual(self.parse(text), expected) + + def test_function_removed_with_struct(self): + text = """\ +static inline struct something* function() +{ +} +""" + expected = "" + self.assertEqual(self.parse(text), expected) + + def test_function_kept(self): + text = """\ +static inline __u64 function() +{ +} +""" + expected = """\ +static inline __u64 function() { +} +""" + self.assertEqual(self.parse(text, set(["function"])), expected) + + def test_var_removed(self): + text = "__u64 variable;" + expected = "" + self.assertEqual(self.parse(text), expected) + + def test_var_kept(self): + text = "__u64 variable;" + expected = "__u64 variable;\n" + self.assertEqual(self.parse(text, set(["variable"])), expected) + + def test_keep_function_typedef(self): + text = "typedef void somefunction_t(void);" + expected = "typedef void somefunction_t(void);\n" + self.assertEqual(self.parse(text), expected) + + def test_struct_keep_attribute(self): + text = """\ +struct something_s { + __u32 s1; + __u32 s2; +} __attribute__((packed)); +""" + expected = """\ +struct something_s { + __u32 s1; + __u32 s2; +} __attribute__((packed)); +""" + self.assertEqual(self.parse(text), expected) + + def test_function_keep_attribute_structs(self): + text = """\ +static __inline__ struct some_struct1 * function(struct some_struct2 * e) { +} +""" + expected = """\ +static __inline__ struct some_struct1 * function(struct some_struct2 * e) { +} +""" + self.assertEqual(self.parse(text, set(["function"])), expected) + + def test_struct_after_struct(self): + text = """\ +struct first { +}; + +struct second { + unsigned short s1; +#define SOMETHING 8 + unsigned short s2; +}; +""" + expected = """\ +struct first { +}; +struct second { + unsigned short s1; +#define SOMETHING 8 + unsigned short s2; +}; +""" + self.assertEqual(self.parse(text), expected) + + def test_other_not_removed(self): + text = """\ +typedef union { + __u64 tu1; + __u64 tu2; +} typedef_name; + +union { + __u64 u1; + __u64 u2; +}; + +struct { + __u64 s1; + __u64 s2; +}; + +enum { + ENUM1 = 0, + ENUM2, +}; + +__extension__ typedef __signed__ long long __s64; +""" + expected = """\ +typedef union { + __u64 tu1; + __u64 tu2; +} typedef_name; +union { + __u64 u1; + __u64 u2; +}; +struct { + __u64 s1; + __u64 s2; +}; +enum { + ENUM1 = 0, + ENUM2, +}; +__extension__ typedef __signed__ long long __s64; +""" + + self.assertEqual(self.parse(text), expected) + + def test_semicolon_after_function(self): + text = """\ +static inline __u64 function() +{ +}; + +struct should_see { + __u32 field; +}; +""" + expected = """\ +struct should_see { + __u32 field; +}; +""" + self.assertEqual(self.parse(text), expected) + + def test_define_in_middle_keep(self): + text = """\ +enum { + ENUM0 = 0x10, + ENUM1 = 0x20, +#define SOMETHING SOMETHING_ELSE + ENUM2 = 0x40, +}; +""" + expected = """\ +enum { + ENUM0 = 0x10, + ENUM1 = 0x20, +#define SOMETHING SOMETHING_ELSE + ENUM2 = 0x40, +}; +""" + self.assertEqual(self.parse(text), expected) + + def test_define_in_middle_remove(self): + text = """\ +static inline function() { +#define SOMETHING1 SOMETHING_ELSE1 + i = 0; + { + i = 1; + } +#define SOMETHING2 SOMETHING_ELSE2 +} +""" + expected = """\ +#define SOMETHING1 SOMETHING_ELSE1 +#define SOMETHING2 SOMETHING_ELSE2 +""" + self.assertEqual(self.parse(text), expected) + + def test_define_in_middle_force_keep(self): + text = """\ +static inline function() { +#define SOMETHING1 SOMETHING_ELSE1 + i = 0; + { + i = 1; + } +#define SOMETHING2 SOMETHING_ELSE2 +} +""" + expected = """\ +static inline function() { +#define SOMETHING1 SOMETHING_ELSE1 + i = 0; + { + i = 1; + } +#define SOMETHING2 SOMETHING_ELSE2 +} +""" + self.assertEqual(self.parse(text, set(["function"])), expected) + + def test_define_before_remove(self): + text = """\ +#define SHOULD_BE_KEPT NOTHING1 +#define ANOTHER_TO_KEEP NOTHING2 +static inline function() { +#define SOMETHING1 SOMETHING_ELSE1 + i = 0; + { + i = 1; + } +#define SOMETHING2 SOMETHING_ELSE2 +} +""" + expected = """\ +#define SHOULD_BE_KEPT NOTHING1 +#define ANOTHER_TO_KEEP NOTHING2 +#define SOMETHING1 SOMETHING_ELSE1 +#define SOMETHING2 SOMETHING_ELSE2 +""" + self.assertEqual(self.parse(text), expected) + + def test_extern_C(self): + text = """\ +#if defined(__cplusplus) +extern "C" { +#endif + +struct something { +}; + +#if defined(__cplusplus) +} +#endif +""" + expected = """\ +#ifdef __cplusplus +extern "C" { +#endif +struct something { +}; +#ifdef __cplusplus +} +#endif +""" + self.assertEqual(self.parse(text), expected) + + def test_macro_definition_removed(self): + text = """\ +#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {} +MACRO_FUNCTION_NO_PARAMS() + +#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } +MACRO_FUNCTION_PARAMS(a = 1) + +something that should still be kept +MACRO_FUNCTION_PARAMS(b) +""" + expected = """\ +#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { } +#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } +something that should still be kept +""" + self.assertEqual(self.parse(text), expected) + if __name__ == '__main__': unittest.main() |