Compare commits

...

4 Commits

Author SHA1 Message Date
JoYo e0c42b8406 single arch from rizin 2022-01-25 17:10:02 -05:00
JoYo 2ad8d47d3a engine type stored in db 2022-01-24 13:47:31 -05:00
JoYo 8cb6a2f393 refactor submodule to include additional disassemblers 2022-01-24 13:27:11 -05:00
JoYo c370df827c display return rates for each arch 2022-01-24 13:15:33 -05:00
6 changed files with 226 additions and 59 deletions

View File

@ -1,23 +1,12 @@
FROM ubuntu:22.04 FROM rizin/rizin:latest
ENV DEBIAN_FRONTEND noninteractive
USER root
RUN apt-get update && apt-get install --yes \ RUN apt-get update && apt-get install --yes \
build-essential \
git \
python3-setuptools \ python3-setuptools \
python3-sqlalchemy \ python3-sqlalchemy \
&& apt-get clean && apt-get clean
RUN git clone -b next https://github.com/capstone-engine/capstone.git /capstone/
WORKDIR /capstone/
RUN sh /capstone/make.sh
RUN sh /capstone/make.sh install
COPY setup.py /app/ COPY setup.py /app/
COPY subdisassem /app/subdisassem/ COPY subdisassem /app/subdisassem/
WORKDIR /app/ WORKDIR /app/
RUN python3 setup.py install RUN python3 setup.py install
RUN useradd -m subdisassem
WORKDIR /home/subdisassem
ENV HOME /home/subdisassem

View File

@ -12,6 +12,7 @@ setup(
python_requires=">3", python_requires=">3",
install_requires=[ install_requires=[
"capstone", "capstone",
"rzpipe",
"SQLAlchemy", "SQLAlchemy",
], ],
) )

View File

@ -31,7 +31,6 @@ import logging
class _CapstoneBase: class _CapstoneBase:
def __init__(self, payload: bytes, offset: int = 0): def __init__(self, payload: bytes, offset: int = 0):
self.arch = self.__class__.__name__
self.capstone.skipdata = True self.capstone.skipdata = True
self.capstone.skipdata_setup = ("unknown", None, None) self.capstone.skipdata_setup = ("unknown", None, None)
@ -74,18 +73,42 @@ class _CapstoneBase:
return opcodes return opcodes
@property @property
def rates(self) -> list: def rets(self) -> list:
if hasattr(self, "_rets"):
return self._rets
self._rets = list()
for opcode in self.disassembly:
if "ret" in opcode.mnemonic:
self._rets.append(opcode.mnemonic)
return self._rets
@property
def ret_rates(self) -> list:
rates = dict()
for mnemonic in set(self.rets):
rates[mnemonic] = self.rets.count(mnemonic)
listed = sorted(((value, key) for (key, value) in rates.items()), reverse=True)
return listed
@property
def mnemonic_rates(self) -> list:
mnemonics = list() mnemonics = list()
for opcode in self.disassembly: for opcode in self.disassembly:
mnemonics.append(opcode.mnemonic) mnemonics.append(opcode.mnemonic)
_rates = dict() rates = dict()
for mnemonic in set(mnemonics): for mnemonic in set(mnemonics):
_rates[mnemonic] = mnemonics.count(mnemonic) rates[mnemonic] = mnemonics.count(mnemonic)
listed = sorted(((value, key) for (key, value) in _rates.items()), reverse=True) listed = sorted(((value, key) for (key, value) in rates.items()), reverse=True)
return listed return listed

View File

@ -0,0 +1,114 @@
from pathlib import Path
import logging
import rzpipe
import json
class _RizinBase:
def __init__(self, path: Path):
rz_pipe = rzpipe.open(str(path.absolute()))
for cmd in self.arch_cmds:
rz_pipe.cmd(cmd)
rz_pipe.cmd("aa")
result = rz_pipe.cmd("pdj")
rz_pipe.quit()
self.disassembly = json.loads(result)
def __repr__(self) -> str:
return self.objdump
def __len__(self) -> int:
return len(self.disassembly)
def __lt__(self, other):
return len(self) < len(other)
@property
def objdump(self) -> str:
if hasattr(self, "_objdump"):
return self._objdump
self._objdump = str()
for each in self.disassembly:
offset = each.get("offset")
opcode = each.get("opcode")
self._objdump += f"{offset:#02x}:\t{opcode}\n"
return self._objdump
@property
def disasm(self) -> list:
if hasattr(self, "_disasm"):
return self._disasm
self._disasm = list()
for each in self.disassembly:
offset = each.get("offset")
opcode = each.get("opcode")
if opcode:
mnemonic = opcode.split(" ")[0]
opcode = opcode.split(" ")[1:]
else:
mnemonic = None
self._disasm.append([offset, mnemonic, opcode])
return self._disasm
@property
def rets(self) -> list:
if hasattr(self, "_rets"):
return self._rets
self._rets = list()
for each in self.disasm:
_, mnemonic, _ = each
if mnemonic and "ret" in mnemonic:
self._rets.append(mnemonic)
return self._rets
@property
def ret_rates(self) -> list:
rates = dict()
for mnemonic in set(self.rets):
rates[mnemonic] = self.rets.count(mnemonic)
_ret_rates = sorted(
((value, key) for (key, value) in rates.items()), reverse=True
)
return _ret_rates
@property
def mnemonic_rates(self) -> list:
mnemonics = list()
for each in self.disasm:
_, mnemonic, _ = each
if mnemonic:
mnemonics.append(mnemonic)
rates = dict()
for mnemonic in set(mnemonics):
rates[mnemonic] = mnemonics.count(mnemonic)
_mnemonic_rates = sorted(
((value, key) for (key, value) in rates.items()), reverse=True
)
return _mnemonic_rates
class x86_16(_RizinBase):
arch_cmds = ["e asm.arch=x86", "e asm.bits=16"]

View File

@ -20,11 +20,13 @@ class Disassembly(Base):
arch = Column(String, nullable=False) arch = Column(String, nullable=False)
checksum = Column(String, nullable=False) checksum = Column(String, nullable=False)
count = Column(Integer, nullable=False) count = Column(Integer, nullable=False)
rates = Column(String, nullable=False) engine = Column(String, nullable=False)
size = Column(Integer, nullable=False) mnemonic_rates = Column(String, nullable=False)
offset = Column(Integer, nullable=False) offset = Column(Integer, nullable=False)
opcodes = Column(String, nullable=False) opcodes = Column(String, nullable=False)
path = Column(String, nullable=False) path = Column(String, nullable=False)
ret_rates = Column(String, nullable=False)
size = Column(Integer, nullable=False)
def __repr__(self): def __repr__(self):
return f"<Disassembly {json.dumps(self.values, indent=1)}>" return f"<Disassembly {json.dumps(self.values, indent=1)}>"
@ -40,10 +42,11 @@ class Disassembly(Base):
values_dict = { values_dict = {
"id": self.id, "id": self.id,
"arch": self.arch, "arch": self.arch,
"engine": self.engine,
"count": self.count, "count": self.count,
"rates": self.rates,
"size": self.size, "size": self.size,
"offset": self.offset, "mnemonic_rates": self.mnemonic_rates,
"ret_rates": self.ret_rates,
} }
return values_dict return values_dict

View File

@ -4,22 +4,8 @@ from pathlib import Path
from sqlalchemy import desc from sqlalchemy import desc
import logging import logging
from .disassemble import ( from . import capstone_wrapper
x86_16, from . import rizin_wrapper
x86_32,
x86_64,
armv7,
thumb2,
aarch64,
mips32,
mips64_el,
ppc64,
sparc,
sparcv9,
systemz,
xcore,
)
from .schema import db_config, Disassembly from .schema import db_config, Disassembly
@ -29,16 +15,18 @@ def subdisassem_script():
parser.add_argument("-b", "--bin-path", required=True) parser.add_argument("-b", "--bin-path", required=True)
parser.add_argument("-l", "--log", action="store_true", help="log to file") parser.add_argument("-l", "--log", action="store_true", help="log to file")
parser.add_argument( parser.add_argument(
"-f", "--fuzz", type=int, default=64, help="offset bruteforce max" "-f", "--fuzz", type=int, default=1, help="offset bruteforce max"
) )
args = parser.parse_args() args = parser.parse_args()
args.bin_path = Path(args.bin_path) args.bin_path = Path(args.bin_path)
if args.verbose: if args.verbose:
print_count = -1
level = logging.DEBUG level = logging.DEBUG
format = "%(asctime)s %(filename)s:%(lineno)d %(message)s" format = "%(asctime)s %(filename)s:%(lineno)d %(message)s"
else: else:
print_count = 5
level = logging.INFO level = logging.INFO
format = "%(asctime)s %(message)s" format = "%(asctime)s %(message)s"
@ -73,29 +61,30 @@ def subdisassem_script():
logging.info(f"sha1sum: {checksum}") logging.info(f"sha1sum: {checksum}")
archs = [ capstone_archs = [
x86_16, capstone_wrapper.x86_16,
x86_32, capstone_wrapper.x86_32,
x86_64, capstone_wrapper.x86_64,
armv7, capstone_wrapper.armv7,
thumb2, capstone_wrapper.thumb2,
aarch64, capstone_wrapper.aarch64,
mips32, capstone_wrapper.mips32,
mips64_el, capstone_wrapper.mips64_el,
ppc64, capstone_wrapper.ppc64,
sparc, capstone_wrapper.sparc,
sparcv9, capstone_wrapper.sparcv9,
systemz, capstone_wrapper.systemz,
xcore, capstone_wrapper.xcore,
] ]
for arch in archs: for arch in capstone_archs:
for offset in range(args.fuzz): for offset in range(args.fuzz):
exists = ( exists = (
session.query(Disassembly) session.query(Disassembly)
.filter(Disassembly.checksum == checksum) .filter(Disassembly.checksum == checksum)
.filter(Disassembly.offset == offset) .filter(Disassembly.offset == offset)
.filter(Disassembly.arch == arch.__name__) .filter(Disassembly.arch == arch.__name__)
.filter(Disassembly.engine == str(arch.__bases__))
.first() .first()
) )
@ -107,21 +96,69 @@ def subdisassem_script():
disasembler = arch(payload=raw_bytes, offset=offset) disasembler = arch(payload=raw_bytes, offset=offset)
row = Disassembly() row = Disassembly()
row.arch = disasembler.arch row.arch = disasembler.__class__.__name__
row.checksum = checksum row.checksum = checksum
row.count = len(disasembler) row.count = len(disasembler)
row.rates = str(disasembler.rates[:5]) row.engine = str(arch.__bases__)
row.size = len(raw_bytes) - offset row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
row.offset = offset row.offset = offset
row.opcodes = disasembler.objdump row.opcodes = disasembler.objdump
row.path = str(args.bin_path.absolute()) row.path = str(args.bin_path.absolute())
row.ret_rates = str(disasembler.ret_rates[:print_count])
row.size = len(raw_bytes) - offset
session.add(row)
session.commit()
rizin_archs = [
rizin_wrapper.x86_16,
]
for arch in rizin_archs:
for offset in range(args.fuzz):
exists = (
session.query(Disassembly)
.filter(Disassembly.checksum == checksum)
.filter(Disassembly.offset == offset)
.filter(Disassembly.arch == arch.__name__)
.filter(Disassembly.engine == str(arch.__bases__))
.first()
)
if exists:
logging.debug(
f"subdiassembly_exists: {[arch.__name__, checksum, offset]}"
)
continue
disasembler = arch(path=args.bin_path)
row = Disassembly()
row.arch = disasembler.__class__.__name__
row.checksum = checksum
row.count = len(disasembler)
row.engine = str(arch.__bases__)
row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
row.offset = offset
row.opcodes = disasembler.objdump
row.path = str(args.bin_path.absolute())
row.ret_rates = str(disasembler.ret_rates[:print_count])
row.size = len(raw_bytes) - offset
session.add(row) session.add(row)
session.commit() session.commit()
tops = list() tops = list()
for arch in archs: for arch in capstone_archs:
top = (
session.query(Disassembly)
.filter(Disassembly.arch == arch.__name__)
.order_by(desc("count"))
.first()
)
tops.append(top)
for arch in rizin_archs:
top = ( top = (
session.query(Disassembly) session.query(Disassembly)
.filter(Disassembly.arch == arch.__name__) .filter(Disassembly.arch == arch.__name__)
@ -132,5 +169,5 @@ def subdisassem_script():
tops = sorted(tops, key=len, reverse=True) tops = sorted(tops, key=len, reverse=True)
for top in tops[:3]: for top in tops[:print_count]:
logging.info(top) logging.info(top)