164 lines
4.5 KiB
Python
Executable File
164 lines
4.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
strip_asm.py - Cleanup ASM output for the specified file
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from argparse import ArgumentParser
|
|
|
|
|
|
def find_used_labels(asm):
|
|
found = set()
|
|
label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
|
|
for line in asm.splitlines():
|
|
m = label_re.match(line)
|
|
if m:
|
|
found.add(".L%s" % m.group(1))
|
|
return found
|
|
|
|
|
|
def normalize_labels(asm):
|
|
decls = set()
|
|
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
|
|
for line in asm.splitlines():
|
|
m = label_decl.match(line)
|
|
if m:
|
|
decls.add(m.group(0))
|
|
if len(decls) == 0:
|
|
return asm
|
|
needs_dot = next(iter(decls))[0] != "."
|
|
if not needs_dot:
|
|
return asm
|
|
for ld in decls:
|
|
asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm)
|
|
return asm
|
|
|
|
|
|
def transform_labels(asm):
|
|
asm = normalize_labels(asm)
|
|
used_decls = find_used_labels(asm)
|
|
new_asm = ""
|
|
label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
|
|
for line in asm.splitlines():
|
|
m = label_decl.match(line)
|
|
if not m or m.group(0) in used_decls:
|
|
new_asm += line
|
|
new_asm += "\n"
|
|
return new_asm
|
|
|
|
|
|
def is_identifier(tk):
|
|
if len(tk) == 0:
|
|
return False
|
|
first = tk[0]
|
|
if not first.isalpha() and first != "_":
|
|
return False
|
|
for i in range(1, len(tk)):
|
|
c = tk[i]
|
|
if not c.isalnum() and c != "_":
|
|
return False
|
|
return True
|
|
|
|
|
|
def process_identifiers(line):
|
|
"""
|
|
process_identifiers - process all identifiers and modify them to have
|
|
consistent names across all platforms; specifically across ELF and MachO.
|
|
For example, MachO inserts an additional understore at the beginning of
|
|
names. This function removes that.
|
|
"""
|
|
parts = re.split(r"([a-zA-Z0-9_]+)", line)
|
|
new_line = ""
|
|
for tk in parts:
|
|
if is_identifier(tk):
|
|
if tk.startswith("__Z"):
|
|
tk = tk[1:]
|
|
elif (
|
|
tk.startswith("_")
|
|
and len(tk) > 1
|
|
and tk[1].isalpha()
|
|
and tk[1] != "Z"
|
|
):
|
|
tk = tk[1:]
|
|
new_line += tk
|
|
return new_line
|
|
|
|
|
|
def process_asm(asm):
|
|
"""
|
|
Strip the ASM of unwanted directives and lines
|
|
"""
|
|
new_contents = ""
|
|
asm = transform_labels(asm)
|
|
|
|
# TODO: Add more things we want to remove
|
|
discard_regexes = [
|
|
re.compile(r"\s+\..*$"), # directive
|
|
re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM
|
|
re.compile(r"\s*#.*$"), # comment line
|
|
re.compile(
|
|
r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"
|
|
), # global directive
|
|
re.compile(
|
|
r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
|
|
),
|
|
]
|
|
keep_regexes: list[re.Pattern] = []
|
|
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
|
|
for line in asm.splitlines():
|
|
# Remove Mach-O attribute
|
|
line = line.replace("@GOTPCREL", "")
|
|
add_line = True
|
|
for reg in discard_regexes:
|
|
if reg.match(line) is not None:
|
|
add_line = False
|
|
break
|
|
for reg in keep_regexes:
|
|
if reg.match(line) is not None:
|
|
add_line = True
|
|
break
|
|
if add_line:
|
|
if fn_label_def.match(line) and len(new_contents) != 0:
|
|
new_contents += "\n"
|
|
line = process_identifiers(line)
|
|
new_contents += line
|
|
new_contents += "\n"
|
|
return new_contents
|
|
|
|
|
|
def main():
|
|
parser = ArgumentParser(description="generate a stripped assembly file")
|
|
parser.add_argument(
|
|
"input",
|
|
metavar="input",
|
|
type=str,
|
|
nargs=1,
|
|
help="An input assembly file",
|
|
)
|
|
parser.add_argument(
|
|
"out", metavar="output", type=str, nargs=1, help="The output file"
|
|
)
|
|
args, unknown_args = parser.parse_known_args()
|
|
input = args.input[0]
|
|
output = args.out[0]
|
|
if not os.path.isfile(input):
|
|
print("ERROR: input file '%s' does not exist" % input)
|
|
sys.exit(1)
|
|
|
|
with open(input, "r") as f:
|
|
contents = f.read()
|
|
new_contents = process_asm(contents)
|
|
with open(output, "w") as f:
|
|
f.write(new_contents)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|
|
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
|
|
# kate: indent-mode python; remove-trailing-spaces modified;
|