Spaces:
Sleeping
Sleeping
Commit
·
3c8598e
1
Parent(s):
135b99f
format
Browse files
main.py
CHANGED
|
@@ -11,26 +11,40 @@ from dist import levenshtein_with_wildcard, print_match_summary
|
|
| 11 |
|
| 12 |
description = frontmatter.load("README.md").content
|
| 13 |
|
|
|
|
| 14 |
def trim(str, n):
|
| 15 |
return "\n".join(str.splitlines()[n:])
|
| 16 |
|
|
|
|
| 17 |
def trim_objdump(str):
|
| 18 |
return trim(str, 7)
|
| 19 |
|
|
|
|
| 20 |
def disassemble_bytes(byte_data, architecture, options):
|
| 21 |
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
|
| 22 |
temp_bin_file.write(byte_data)
|
| 23 |
temp_bin_file_name = temp_bin_file.name
|
| 24 |
|
| 25 |
disassembly = subprocess.run(
|
| 26 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
capture_output=True,
|
| 28 |
-
text=True
|
| 29 |
).stdout
|
| 30 |
disassembly = trim_objdump(disassembly)
|
| 31 |
|
| 32 |
return disassembly
|
| 33 |
|
|
|
|
| 34 |
def compile(compiler, flags, source):
|
| 35 |
# Create a temporary file for the C source code
|
| 36 |
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
|
|
@@ -71,9 +85,7 @@ def compile(compiler, flags, source):
|
|
| 71 |
|
| 72 |
# Disassemble the object file
|
| 73 |
disassembly = subprocess.run(
|
| 74 |
-
["objdump", "-dr", temp_o_file_name],
|
| 75 |
-
capture_output=True,
|
| 76 |
-
text=True
|
| 77 |
).stdout
|
| 78 |
disassembly = trim_objdump(disassembly)
|
| 79 |
|
|
@@ -86,23 +98,27 @@ def compile(compiler, flags, source):
|
|
| 86 |
# relocs = trim(relocs, 3)
|
| 87 |
|
| 88 |
json_relocs = subprocess.run(
|
| 89 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
capture_output=True,
|
| 91 |
text=True,
|
| 92 |
).stdout
|
| 93 |
json_relocs = json.loads(json_relocs)
|
| 94 |
json_relocs = json_relocs[0]["Relocations"]
|
| 95 |
-
json_relocs = [r["Relocation"] for d in json_relocs for r in d[
|
| 96 |
# Filter out .text
|
| 97 |
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
if result.returncode == 0:
|
| 102 |
return json_relocs, compiled_bytes, compile_output, disassembly
|
| 103 |
else:
|
| 104 |
return None, None, compile_output, disassembly
|
| 105 |
|
|
|
|
| 106 |
def _reloc_type2size(s):
|
| 107 |
match s:
|
| 108 |
case "R_X86_64_PC32":
|
|
@@ -112,20 +128,31 @@ def _reloc_type2size(s):
|
|
| 112 |
case _:
|
| 113 |
assert False, f"Unknown reloc {s}"
|
| 114 |
|
|
|
|
| 115 |
def _compute_relocs_byte_range(json_relocs):
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
| 122 |
target_bytes = bytes.fromhex(target_bytes)
|
| 123 |
-
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
|
|
|
|
|
|
|
| 124 |
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
|
| 125 |
|
| 126 |
if compiled_bytes is not None:
|
| 127 |
|
| 128 |
-
reloc_edit_distance, reloc_operations = print_match_summary(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
print(f"reloc_edit_distance: {reloc_edit_distance}")
|
| 130 |
print(f"reloc operations: {reloc_operations}")
|
| 131 |
|
|
@@ -138,7 +165,7 @@ def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
|
| 138 |
compile_output,
|
| 139 |
compiled_disassembly,
|
| 140 |
compiled_relocs,
|
| 141 |
-
target_disassembly
|
| 142 |
)
|
| 143 |
else:
|
| 144 |
return (
|
|
@@ -150,7 +177,7 @@ def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
|
| 150 |
compile_output,
|
| 151 |
compiled_disassembly,
|
| 152 |
compiled_relocs,
|
| 153 |
-
target_disassembly
|
| 154 |
)
|
| 155 |
|
| 156 |
|
|
@@ -172,7 +199,7 @@ def run():
|
|
| 172 |
gr.Textbox(label="Compiler", value="g++"),
|
| 173 |
gr.Textbox(label="Compiler Flags", value="-O2"),
|
| 174 |
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
|
| 175 |
-
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
|
| 176 |
],
|
| 177 |
outputs=[
|
| 178 |
gr.Textbox(label="Compiled bytes"),
|
|
|
|
| 11 |
|
| 12 |
description = frontmatter.load("README.md").content
|
| 13 |
|
| 14 |
+
|
| 15 |
def trim(str, n):
|
| 16 |
return "\n".join(str.splitlines()[n:])
|
| 17 |
|
| 18 |
+
|
| 19 |
def trim_objdump(str):
|
| 20 |
return trim(str, 7)
|
| 21 |
|
| 22 |
+
|
| 23 |
def disassemble_bytes(byte_data, architecture, options):
|
| 24 |
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
|
| 25 |
temp_bin_file.write(byte_data)
|
| 26 |
temp_bin_file_name = temp_bin_file.name
|
| 27 |
|
| 28 |
disassembly = subprocess.run(
|
| 29 |
+
[
|
| 30 |
+
"objdump",
|
| 31 |
+
"-D",
|
| 32 |
+
"-b",
|
| 33 |
+
"binary",
|
| 34 |
+
"-m",
|
| 35 |
+
architecture,
|
| 36 |
+
"-M",
|
| 37 |
+
options,
|
| 38 |
+
temp_bin_file_name,
|
| 39 |
+
],
|
| 40 |
capture_output=True,
|
| 41 |
+
text=True,
|
| 42 |
).stdout
|
| 43 |
disassembly = trim_objdump(disassembly)
|
| 44 |
|
| 45 |
return disassembly
|
| 46 |
|
| 47 |
+
|
| 48 |
def compile(compiler, flags, source):
|
| 49 |
# Create a temporary file for the C source code
|
| 50 |
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
|
|
|
|
| 85 |
|
| 86 |
# Disassemble the object file
|
| 87 |
disassembly = subprocess.run(
|
| 88 |
+
["objdump", "-dr", temp_o_file_name], capture_output=True, text=True
|
|
|
|
|
|
|
| 89 |
).stdout
|
| 90 |
disassembly = trim_objdump(disassembly)
|
| 91 |
|
|
|
|
| 98 |
# relocs = trim(relocs, 3)
|
| 99 |
|
| 100 |
json_relocs = subprocess.run(
|
| 101 |
+
[
|
| 102 |
+
"llvm-readobj-19",
|
| 103 |
+
"--elf-output-style=JSON",
|
| 104 |
+
"--relocations",
|
| 105 |
+
temp_o_file_name,
|
| 106 |
+
],
|
| 107 |
capture_output=True,
|
| 108 |
text=True,
|
| 109 |
).stdout
|
| 110 |
json_relocs = json.loads(json_relocs)
|
| 111 |
json_relocs = json_relocs[0]["Relocations"]
|
| 112 |
+
json_relocs = [r["Relocation"] for d in json_relocs for r in d["Relocs"]]
|
| 113 |
# Filter out .text
|
| 114 |
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
|
| 115 |
|
|
|
|
|
|
|
| 116 |
if result.returncode == 0:
|
| 117 |
return json_relocs, compiled_bytes, compile_output, disassembly
|
| 118 |
else:
|
| 119 |
return None, None, compile_output, disassembly
|
| 120 |
|
| 121 |
+
|
| 122 |
def _reloc_type2size(s):
|
| 123 |
match s:
|
| 124 |
case "R_X86_64_PC32":
|
|
|
|
| 128 |
case _:
|
| 129 |
assert False, f"Unknown reloc {s}"
|
| 130 |
|
| 131 |
+
|
| 132 |
def _compute_relocs_byte_range(json_relocs):
|
| 133 |
+
relocs_byte_range = [
|
| 134 |
+
range(r["Offset"], r["Offset"] + _reloc_type2size(r["Type"]["Name"]))
|
| 135 |
+
for r in json_relocs
|
| 136 |
+
]
|
| 137 |
+
# Flatten relocs_byte_range
|
| 138 |
+
relocs_byte_range = [i for r in relocs_byte_range for i in r]
|
| 139 |
+
return relocs_byte_range
|
| 140 |
+
|
| 141 |
|
| 142 |
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
| 143 |
target_bytes = bytes.fromhex(target_bytes)
|
| 144 |
+
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
|
| 145 |
+
compiler, flags, source
|
| 146 |
+
)
|
| 147 |
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
|
| 148 |
|
| 149 |
if compiled_bytes is not None:
|
| 150 |
|
| 151 |
+
reloc_edit_distance, reloc_operations = print_match_summary(
|
| 152 |
+
target_bytes,
|
| 153 |
+
compiled_bytes,
|
| 154 |
+
wildcard_offsets_seq2=_compute_relocs_byte_range(compiled_relocs),
|
| 155 |
+
)
|
| 156 |
print(f"reloc_edit_distance: {reloc_edit_distance}")
|
| 157 |
print(f"reloc operations: {reloc_operations}")
|
| 158 |
|
|
|
|
| 165 |
compile_output,
|
| 166 |
compiled_disassembly,
|
| 167 |
compiled_relocs,
|
| 168 |
+
target_disassembly,
|
| 169 |
)
|
| 170 |
else:
|
| 171 |
return (
|
|
|
|
| 177 |
compile_output,
|
| 178 |
compiled_disassembly,
|
| 179 |
compiled_relocs,
|
| 180 |
+
target_disassembly,
|
| 181 |
)
|
| 182 |
|
| 183 |
|
|
|
|
| 199 |
gr.Textbox(label="Compiler", value="g++"),
|
| 200 |
gr.Textbox(label="Compiler Flags", value="-O2"),
|
| 201 |
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
|
| 202 |
+
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64"),
|
| 203 |
],
|
| 204 |
outputs=[
|
| 205 |
gr.Textbox(label="Compiled bytes"),
|