subdisassem/subdisassem/scripts.py

129 lines
3.2 KiB
Python

from argparse import ArgumentParser
from hashlib import sha1
from pathlib import Path
from sqlalchemy import desc
import logging
from .disassemble import (
X86_intel,
X86,
X86_64,
ARM,
Thumb,
ARM_64,
MIPS_32_eb,
MIPS_64_el,
PPC_64,
Sparc,
SparcV9,
SystemZ,
XCore,
)
from .schema import db_config, Disassembly
def subdisassem_script():
parser = ArgumentParser(description="")
parser.add_argument("-v", "--verbose", action="count", help="verbose logging")
parser.add_argument("-b", "--bin-path", required=True)
parser.add_argument("-l", "--log", action="store_true", help="log to file")
parser.add_argument("-f", "--fuzz", default=64, help="offset bruteforce max")
args = parser.parse_args()
args.bin_path = Path(args.bin_path)
if args.verbose:
level = logging.DEBUG
format = "%(asctime)s %(filename)s:%(lineno)d %(message)s"
else:
level = logging.INFO
format = "%(asctime)s %(message)s"
if args.log:
filename = args.bin_path.parent.joinpath(f"{args.bin_path.name}.log")
logging.basicConfig(
level=level,
format=format,
filename=filename,
)
else:
logging.basicConfig(
level=level,
format=format,
)
logging.info(args)
db_path = args.bin_path.parent.joinpath(f"{args.bin_path.name}.sqlite").absolute()
session = db_config(db_path)
logging.info(f"results sqlite database created at {db_path}")
# reading the whole file into memory until I get an idea for pagnating
with args.bin_path.open("rb") as file_open:
raw_bytes = file_open.read()
sha1sum = sha1()
sha1sum.update(raw_bytes)
checksum = sha1sum.hexdigest()
logging.info(f"sha1sum: {checksum}")
archs = [
X86_intel,
X86,
X86_64,
ARM,
Thumb,
ARM_64,
MIPS_32_eb,
MIPS_64_el,
PPC_64,
Sparc,
SparcV9,
SystemZ,
XCore,
]
for arch in archs:
for offset in range(args.fuzz):
exists = (
session.query(Disassembly)
.filter(Disassembly.checksum == checksum)
.filter(Disassembly.offset == offset)
.filter(Disassembly.arch == arch.__name__)
.first()
)
if exists:
logging.debug(
f"subdiassembly_exists: {[arch.__name__, checksum, offset]}"
)
continue
disasembler = arch(payload=raw_bytes, offset=offset)
row = Disassembly()
row.arch = disasembler.arch
row.checksum = checksum
row.count = len(disasembler)
row.size = len(raw_bytes) - offset
row.offset = offset
row.opcodes = disasembler.objdump
row.path = str(args.bin_path.absolute())
session.add(row)
session.commit()
count = session.query(Disassembly).order_by(desc("count")).first()
tops = (
session.query(Disassembly)
.filter(Disassembly.count == count.count)
.order_by(desc("size"))
.all()
)
for top in tops[:3]:
logging.info(top)