import json import logging import subprocess from pathlib import Path from typing import List, Optional import asm_differ.diff as asm_differ from coreapp import platforms from coreapp.platforms import DUMMY, Platform from .compiler_wrapper import DiffResult, PATH from .error import AssemblyError, DiffError, NmError, ObjdumpError from .models.scratch import Assembly from .sandbox import Sandbox logger = logging.getLogger(__name__) MAX_FUNC_SIZE_LINES = 5000 class DiffWrapper: @staticmethod def filter_objdump_flags(compiler_flags: str) -> str: # Remove irrelevant flags that are part of the base objdump configs, but clutter the compiler settings field. # TODO: use cfg for this? skip_flags_with_args: set[str] = set() skip_flags = { "--disassemble", "--disassemble-zeroes", "--line-numbers", "--reloc", } skip_next = False flags = [] for flag in compiler_flags.split(): if skip_next: skip_next = False continue if flag in skip_flags: continue if flag in skip_flags_with_args: skip_next = True continue if any(flag.startswith(f) for f in skip_flags_with_args): continue flags.append(flag) return " ".join(flags) @staticmethod def create_config(arch: asm_differ.ArchSettings) -> asm_differ.Config: return asm_differ.Config( arch=arch, # Build/objdump options diff_obj=True, objfile="", make=False, source_old_binutils=True, diff_section=".text", inlines=False, max_function_size_lines=MAX_FUNC_SIZE_LINES, max_function_size_bytes=MAX_FUNC_SIZE_LINES * 4, # Display options formatter=asm_differ.JsonFormatter(arch_str=arch.name), threeway=None, base_shift=0, skip_lines=0, compress=None, show_branches=True, show_line_numbers=False, show_source=False, stop_at_ret=False, ignore_large_imms=False, ignore_addr_diffs=True, algorithm="levenshtein", reg_categories={}, ) @staticmethod def get_objdump_target_function_flags( sandbox: Sandbox, target_path: Path, platform: Platform, label: str ) -> List[str]: if not label: return ["--start-address=0"] if platform.supports_objdump_disassemble: return [f"--disassemble={label}"] if not platform.nm_cmd: raise NmError(f"No nm command for {platform.id}") try: nm_proc = sandbox.run_subprocess( [platform.nm_cmd] + [sandbox.rewrite_path(target_path)], shell=True, env={ "PATH": PATH, }, ) except subprocess.CalledProcessError as e: raise NmError.from_process_error(e) if nm_proc.stdout: # e.g. # 00000000 T osEepromRead # U osMemSize for line in nm_proc.stdout.splitlines(): nm_line = line.split() if len(nm_line) == 3 and label == nm_line[2]: start_addr = int(nm_line[0], 16) return [f"--start-address={start_addr}"] return ["--start-address=0"] @staticmethod def parse_objdump_flags(diff_flags: List[str]) -> List[str]: ret = [] if "-Mreg-names=32" in diff_flags: ret.append("-Mreg-names=32") return ret @staticmethod def run_objdump( target_data: bytes, platform: Platform, config: asm_differ.Config, label: str, flags: List[str], ) -> str: flags += [ "--disassemble", "--disassemble-zeroes", "--line-numbers", "--reloc", ] with Sandbox() as sandbox: target_path = sandbox.path / "out.s" target_path.write_bytes(target_data) flags += DiffWrapper.get_objdump_target_function_flags( sandbox, target_path, platform, label ) flags += config.arch.arch_flags if platform.objdump_cmd: try: objdump_proc = sandbox.run_subprocess( platform.objdump_cmd.split() + flags + [sandbox.rewrite_path(target_path)], shell=True, env={ "PATH": PATH, }, ) except subprocess.CalledProcessError as e: raise ObjdumpError.from_process_error(e) else: raise ObjdumpError(f"No objdump command for {platform.id}") out = objdump_proc.stdout return out @staticmethod def get_dump( elf_object: bytes, platform: Platform, diff_label: str, config: asm_differ.Config, diff_flags: List[str], ) -> str: if len(elf_object) == 0: raise AssemblyError("Asm empty") basedump = DiffWrapper.run_objdump( elf_object, platform, config, diff_label, diff_flags ) if not basedump: raise ObjdumpError("Error running objdump") # Preprocess the dump try: basedump = asm_differ.preprocess_objdump_out( None, elf_object, basedump, config ) except AssertionError as e: logger.exception("Error preprocessing dump") raise DiffError(f"Error preprocessing dump: {e}") except Exception as e: raise DiffError(f"Error preprocessing dump: {e}") return basedump @staticmethod def diff( target_assembly: Assembly, platform: Platform, diff_label: str, compiled_elf: bytes, allow_target_only: bool, diff_flags: List[str], ) -> DiffResult: if platform == DUMMY: # Todo produce diff for dummy return {"rows": ["a", "b"]} try: arch = asm_differ.get_arch(platform.arch or "") except ValueError: logger.error(f"Unsupported arch: {platform.arch}. Continuing assuming mips") arch = asm_differ.get_arch("mips") objdump_flags = DiffWrapper.parse_objdump_flags(diff_flags) config = DiffWrapper.create_config(arch) # TODO pass and use diff_flags basedump = DiffWrapper.get_dump( bytes(target_assembly.elf_object), platform, diff_label, config, objdump_flags, ) try: mydump = DiffWrapper.get_dump( compiled_elf, platform, diff_label, config, objdump_flags ) except Exception as e: if allow_target_only: mydump = "" else: raise e try: display = asm_differ.Display(basedump, mydump, config) except Exception as e: raise DiffError(f"Error running asm-differ: {e}") try: # TODO: It would be nice to get a python object from `run_diff()` to avoid the # JSON roundtrip. See https://github.com/simonlindholm/asm-differ/issues/56 result = json.loads(display.run_diff()[0]) result["error"] = None except Exception as e: raise DiffError(f"Error running asm-differ: {e}") return result