from argparse import ArgumentParser from hashlib import sha1 from pathlib import Path from sqlalchemy import desc import logging from .disassemble import ( x86_16, x86_32, x86_64, armv7, thumb2, aarch64, mips32, mips64_el, ppc64, sparc, sparcv9, systemz, xcore, ) from .schema import db_config, Disassembly def subdisassem_script(): parser = ArgumentParser(description="") parser.add_argument("-v", "--verbose", action="count", help="verbose logging") parser.add_argument("-b", "--bin-path", required=True) parser.add_argument("-l", "--log", action="store_true", help="log to file") parser.add_argument( "-f", "--fuzz", type=int, default=1, help="offset bruteforce max" ) args = parser.parse_args() args.bin_path = Path(args.bin_path) if args.verbose: level = logging.DEBUG format = "%(asctime)s %(filename)s:%(lineno)d %(message)s" else: level = logging.INFO format = "%(asctime)s %(message)s" if args.log: filename = args.bin_path.parent.joinpath(f"{args.bin_path.name}.log") logging.basicConfig( level=level, format=format, filename=filename, ) else: logging.basicConfig( level=level, format=format, ) logging.info(args) db_path = args.bin_path.parent.joinpath(f"{args.bin_path.name}.sqlite").absolute() session = db_config(db_path) logging.info(f"results sqlite database created at {db_path}") # reading the whole file into memory until I get an idea for pagnating with args.bin_path.open("rb") as file_open: raw_bytes = file_open.read() sha1sum = sha1() sha1sum.update(raw_bytes) checksum = sha1sum.hexdigest() logging.info(f"sha1sum: {checksum}") archs = [ x86_16, x86_32, x86_64, armv7, thumb2, aarch64, mips32, mips64_el, ppc64, sparc, sparcv9, systemz, xcore, ] for arch in archs: for offset in range(args.fuzz): exists = ( session.query(Disassembly) .filter(Disassembly.checksum == checksum) .filter(Disassembly.offset == offset) .filter(Disassembly.arch == arch.__name__) .first() ) if exists: logging.debug( f"subdiassembly_exists: {[arch.__name__, checksum, offset]}" ) continue disasembler = arch(payload=raw_bytes, offset=offset) row = Disassembly() row.arch = disasembler.arch row.checksum = checksum row.count = len(disasembler) row.mnemonic_rates = str(disasembler.mnemonic_rates[:5]) row.ret_rates = str(disasembler.ret_rates[:5]) row.size = len(raw_bytes) - offset row.offset = offset row.opcodes = disasembler.objdump row.path = str(args.bin_path.absolute()) session.add(row) session.commit() tops = list() for arch in archs: top = ( session.query(Disassembly) .filter(Disassembly.arch == arch.__name__) .order_by(desc("count")) .first() ) tops.append(top) tops = sorted(tops, key=len, reverse=True) for top in tops[:5]: logging.info(top)