#!/usr/bin/env python3 """ strip_asm.py - Cleanup ASM output for the specified file """ import os import re import sys from argparse import ArgumentParser def find_used_labels(asm): found = set() label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") for line in asm.splitlines(): m = label_re.match(line) if m: found.add(".L%s" % m.group(1)) return found def normalize_labels(asm): decls = set() label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for line in asm.splitlines(): m = label_decl.match(line) if m: decls.add(m.group(0)) if len(decls) == 0: return asm needs_dot = next(iter(decls))[0] != "." if not needs_dot: return asm for ld in decls: asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm) return asm def transform_labels(asm): asm = normalize_labels(asm) used_decls = find_used_labels(asm) new_asm = "" label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for line in asm.splitlines(): m = label_decl.match(line) if not m or m.group(0) in used_decls: new_asm += line new_asm += "\n" return new_asm def is_identifier(tk): if len(tk) == 0: return False first = tk[0] if not first.isalpha() and first != "_": return False for i in range(1, len(tk)): c = tk[i] if not c.isalnum() and c != "_": return False return True def process_identifiers(line): """ process_identifiers - process all identifiers and modify them to have consistent names across all platforms; specifically across ELF and MachO. For example, MachO inserts an additional understore at the beginning of names. This function removes that. """ parts = re.split(r"([a-zA-Z0-9_]+)", line) new_line = "" for tk in parts: if is_identifier(tk): if tk.startswith("__Z"): tk = tk[1:] elif ( tk.startswith("_") and len(tk) > 1 and tk[1].isalpha() and tk[1] != "Z" ): tk = tk[1:] new_line += tk return new_line def process_asm(asm): """ Strip the ASM of unwanted directives and lines """ new_contents = "" asm = transform_labels(asm) # TODO: Add more things we want to remove discard_regexes = [ re.compile(r"\s+\..*$"), # directive re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM re.compile(r"\s*#.*$"), # comment line re.compile( r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)" ), # global directive re.compile( r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" ), ] keep_regexes: list[re.Pattern] = [] fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") for line in asm.splitlines(): # Remove Mach-O attribute line = line.replace("@GOTPCREL", "") add_line = True for reg in discard_regexes: if reg.match(line) is not None: add_line = False break for reg in keep_regexes: if reg.match(line) is not None: add_line = True break if add_line: if fn_label_def.match(line) and len(new_contents) != 0: new_contents += "\n" line = process_identifiers(line) new_contents += line new_contents += "\n" return new_contents def main(): parser = ArgumentParser(description="generate a stripped assembly file") parser.add_argument( "input", metavar="input", type=str, nargs=1, help="An input assembly file", ) parser.add_argument( "out", metavar="output", type=str, nargs=1, help="The output file" ) args, unknown_args = parser.parse_known_args() input = args.input[0] output = args.out[0] if not os.path.isfile(input): print("ERROR: input file '%s' does not exist" % input) sys.exit(1) with open(input, "r") as f: contents = f.read() new_contents = process_asm(contents) with open(output, "w") as f: f.write(new_contents) if __name__ == "__main__": main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; # kate: indent-mode python; remove-trailing-spaces modified;