diff --git a/backend/mips_to_c/.gitrepo b/backend/mips_to_c/.gitrepo index 6ac7b89b..5d2c4268 100644 --- a/backend/mips_to_c/.gitrepo +++ b/backend/mips_to_c/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/matt-kempster/mips_to_c branch = master - commit = 3c3b0cede1a99430bfd3edf8d385802b94f91307 - parent = be2d33a8d349224b72e85ab418eb2c21c2de18d9 + commit = c0ddea7fd365f56b531f3b981f0f678730f2b882 + parent = 7d76774b085f31411e8473b7c8844247e76fb6d1 method = merge cmdver = 0.4.3 diff --git a/backend/mips_to_c/__init__.py b/backend/mips_to_c/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/mips_to_c/src/c_types.py b/backend/mips_to_c/src/c_types.py index c67dfac3..45fee5d4 100644 --- a/backend/mips_to_c/src/c_types.py +++ b/backend/mips_to_c/src/c_types.py @@ -68,8 +68,10 @@ class TypeMap: typedefs: Dict[str, CType] = field(default_factory=dict) var_types: Dict[str, CType] = field(default_factory=dict) functions: Dict[str, Function] = field(default_factory=dict) - structs: Dict[Union[str, int], Struct] = field(default_factory=dict) - struct_typedefs: Dict[Union[str, int], CType] = field(default_factory=dict) + structs: Dict[Union[str, StructUnion], Struct] = field(default_factory=dict) + struct_typedefs: Dict[Union[str, StructUnion], TypeDecl] = field( + default_factory=dict + ) enum_values: Dict[str, int] = field(default_factory=dict) @@ -97,20 +99,6 @@ def resolve_typedefs(type: CType, typemap: TypeMap) -> CType: return type -def pointer_decay(type: CType, typemap: TypeMap) -> SimpleType: - real_type = resolve_typedefs(type, typemap) - if isinstance(real_type, ArrayDecl): - return PtrDecl(quals=[], type=real_type.type) - if isinstance(real_type, FuncDecl): - return PtrDecl(quals=[], type=type) - if isinstance(real_type, TypeDecl) and isinstance(real_type.type, ca.Enum): - return basic_type(["int"]) - assert not isinstance( - type, (ArrayDecl, FuncDecl) - ), "resolve_typedefs can't hide arrays/functions" - return type - - def type_from_global_decl(decl: ca.Decl) -> CType: """Get the CType of a global Decl, stripping names of function parameters.""" tp = decl.type @@ -130,12 +118,6 @@ def type_from_global_decl(decl: ca.Decl) -> CType: return ca.FuncDecl(args=ca.ParamList(new_params), type=tp.type) -def deref_type(type: CType, typemap: TypeMap) -> CType: - type = resolve_typedefs(type, typemap) - assert isinstance(type, (ArrayDecl, PtrDecl)), "dereferencing non-pointer" - return type.type - - def is_void(type: CType) -> bool: return ( isinstance(type, ca.TypeDecl) @@ -144,35 +126,6 @@ def is_void(type: CType) -> bool: ) -def equal_types(a: CType, b: CType) -> bool: - def equal(a: object, b: object) -> bool: - if a is b: - return True - if type(a) != type(b): - return False - if a is None: - return b is None - if isinstance(a, list): - assert isinstance(b, list) - if len(a) != len(b): - return False - for i in range(len(a)): - if not equal(a[i], b[i]): - return False - return True - if isinstance(a, (int, str)): - return bool(a == b) - assert isinstance(a, ca.Node) - for name in a.__slots__[:-2]: # type: ignore - if name == "declname": - continue - if not equal(getattr(a, name), getattr(b, name)): - return False - return True - - return equal(a, b) - - def primitive_size(type: Union[ca.Enum, ca.IdentifierType]) -> int: if isinstance(type, ca.Enum): return 4 @@ -210,11 +163,6 @@ def function_arg_size_align(type: CType, typemap: TypeMap) -> Tuple[int, int]: return size, size -def var_size_align(type: CType, typemap: TypeMap) -> Tuple[int, int]: - size, align, _ = parse_struct_member(type, "", typemap, allow_unsized=True) - return size, align - - def is_struct_type(type: CType, typemap: TypeMap) -> bool: type = resolve_typedefs(type, typemap) if not isinstance(type, TypeDecl): @@ -373,7 +321,7 @@ def get_struct( if struct.name: return typemap.structs.get(struct.name) else: - return typemap.structs.get(id(struct)) + return typemap.structs.get(struct) def parse_struct(struct: Union[ca.Struct, ca.Union], typemap: TypeMap) -> Struct: @@ -385,7 +333,7 @@ def parse_struct(struct: Union[ca.Struct, ca.Union], typemap: TypeMap) -> Struct ret = do_parse_struct(struct, typemap) if struct.name: typemap.structs[struct.name] = ret - typemap.structs[id(struct)] = ret + typemap.structs[struct] = ret return ret @@ -420,22 +368,6 @@ def parse_struct_member( return size, size, None -def expand_detailed_struct_member( - substr: DetailedStructMember, type: CType, size: int -) -> Iterator[Tuple[int, str, CType, int]]: - yield (0, "", type, size) - if isinstance(substr, Struct): - for off, sfields in substr.fields.items(): - for field in sfields: - yield (off, "." + field.name, field.type, field.size) - elif isinstance(substr, Array) and substr.subsize != 1: - for i in range(substr.dim): - for (off, path, subtype, subsize) in expand_detailed_struct_member( - substr.subtype, substr.subctype, substr.subsize - ): - yield (substr.subsize * i + off, f"[{i}]" + path, subtype, subsize) - - def do_parse_struct(struct: Union[ca.Struct, ca.Union], typemap: TypeMap) -> Struct: is_union = isinstance(struct, ca.Union) assert struct.decls is not None, "enforced by caller" @@ -495,12 +427,13 @@ def do_parse_struct(struct: Union[ca.Struct, ca.Union], typemap: TypeMap) -> Str ) align = max(align, salign) offset = (offset + salign - 1) & -salign - for off, path, ftype, fsize in expand_detailed_struct_member( - substr, type, ssize - ): - fields[offset + off].append( - StructField(type=ftype, size=fsize, name=decl.name + path) + fields[offset].append( + StructField( + type=type, + size=ssize, + name=decl.name, ) + ) if is_union: union_size = max(union_size, ssize) else: @@ -531,8 +464,8 @@ def do_parse_struct(struct: Union[ca.Struct, ca.Union], typemap: TypeMap) -> Str offset += 1 # If there is a typedef for this struct, prefer using that name - if id(struct) in typemap.struct_typedefs: - ctype = typemap.struct_typedefs[id(struct)] + if struct in typemap.struct_typedefs: + ctype = typemap.struct_typedefs[struct] elif struct.name and struct.name in typemap.struct_typedefs: ctype = typemap.struct_typedefs[struct.name] else: @@ -641,7 +574,7 @@ def build_typemap(source: str) -> TypeMap: typedef = basic_type([item.name]) if item.type.type.name: ret.struct_typedefs[item.type.type.name] = typedef - ret.struct_typedefs[id(item.type.type)] = typedef + ret.struct_typedefs[item.type.type] = typedef if isinstance(item, ca.FuncDef): assert item.decl.name is not None, "cannot define anonymous function" fn = parse_function(item.decl.type) diff --git a/backend/mips_to_c/src/error.py b/backend/mips_to_c/src/error.py index 8f3570d1..193763e7 100644 --- a/backend/mips_to_c/src/error.py +++ b/backend/mips_to_c/src/error.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import NoReturn @dataclass @@ -7,3 +8,7 @@ class DecompFailure(Exception): def __str__(self) -> str: return self.message + + +def static_assert_unreachable(x: NoReturn) -> NoReturn: + raise Exception(f"Unreachable: {repr(x)}") diff --git a/backend/mips_to_c/src/if_statements.py b/backend/mips_to_c/src/if_statements.py index 698f1e02..aa1dced9 100644 --- a/backend/mips_to_c/src/if_statements.py +++ b/backend/mips_to_c/src/if_statements.py @@ -156,7 +156,7 @@ class LabelStatement: for (switch, case) in self.context.case_nodes[self.node]: if case is not None: case_num = f"0x{case:X}" if fmt.coding_style.hex_case else f"{case}" - case_str = f"case {case_num}" + case_str = f"case {case_num}" else: case_str = "default" switch_str = f" // switch {switch}" if switch != 0 else "" diff --git a/backend/mips_to_c/src/main.py b/backend/mips_to_c/src/main.py index 7dd643b3..ffcfdfff 100644 --- a/backend/mips_to_c/src/main.py +++ b/backend/mips_to_c/src/main.py @@ -41,7 +41,7 @@ def print_exception(sanitize: bool) -> None: def run(options: Options) -> int: all_functions: Dict[str, Function] = {} asm_data = AsmData() - typemap: Optional[TypeMap] = None + typemap: TypeMap = TypeMap() try: for filename in options.filenames: if filename == "-": @@ -63,7 +63,6 @@ def run(options: Options) -> int: return 1 if options.dump_typemap: - assert typemap dump_typemap(typemap) return 0 @@ -88,7 +87,7 @@ def run(options: Options) -> int: functions.append(all_functions[index_or_name]) function_names = set(all_functions.keys()) - global_info = GlobalInfo(asm_data, function_names, typemap) + global_info = GlobalInfo(asm_data, function_names, typemap=typemap) function_infos: List[Union[FunctionInfo, Exception]] = [] for function in functions: try: diff --git a/backend/mips_to_c/src/translate.py b/backend/mips_to_c/src/translate.py index 3a89ec89..dedf0f79 100644 --- a/backend/mips_to_c/src/translate.py +++ b/backend/mips_to_c/src/translate.py @@ -19,8 +19,8 @@ from typing import ( Union, ) -from .c_types import TypeMap -from .error import DecompFailure +from .c_types import CType, TypeMap +from .error import DecompFailure, static_assert_unreachable from .flow_graph import ( FlowGraph, Function, @@ -43,13 +43,12 @@ from .parse_instruction import ( Register, ) from .types import ( + AccessPath, FunctionParam, FunctionSignature, + StructDeclaration, Type, - find_substruct_array, - get_field, - ptr_type_from_ctype, - type_from_ctype, + TypePool, ) ASSOCIATIVE_OPS: Set[str] = {"+", "&&", "||", "&", "|", "^", "*"} @@ -808,6 +807,23 @@ class BinaryOp(Condition): return f"({lhs} {self.op} {right_expr.format(fmt)})" +@dataclass(frozen=True, eq=False) +class TernaryOp(Expression): + cond: Condition + left: Expression + right: Expression + type: Type + + def dependencies(self) -> List[Expression]: + return [self.cond, self.left, self.right] + + def format(self, fmt: Formatter) -> str: + cond_str = simplify_condition(self.cond).format(fmt) + left_str = self.left.format(fmt) + right_str = self.right.format(fmt) + return f"({cond_str} ? {left_str} : {right_str})" + + @dataclass(frozen=True, eq=False) class UnaryOp(Condition): op: str @@ -1007,42 +1023,83 @@ class StructAccess(Expression): struct_var: Expression offset: int target_size: Optional[int] - field_name: Optional[str] = field(compare=False) + field_path: Optional[AccessPath] = field(compare=False) stack_info: StackInfo = field(compare=False, repr=False) type: Type = field(compare=False) - has_late_field_name: bool = field(default=False, compare=False) + checked_late_field_path: bool = field(default=False, compare=False) + + def __post_init__(self) -> None: + self.assert_valid_field_path(self.field_path) + + @staticmethod + def assert_valid_field_path(path: Optional[AccessPath]) -> None: + assert path is None or ( + path and isinstance(path[0], int) + ), "The first element of the field path, if present, must be an int" + + @classmethod + def access_path_to_field_name(cls, path: AccessPath) -> str: + """ + Convert an access path into a dereferencing field name, like the following examples: + - `[0, "foo", 3, "bar"]` into `"->foo[3].bar"` + - `[0, 3, "bar"]` into `"[0][3].bar"` + - `[0, 1, 2]` into `"[0][1][2]" + - `[0]` into `"[0]"` + The path must have at least one element, and the first element must be an int. + """ + cls.assert_valid_field_path(path) + output = "" + + # Replace an initial "[0]." with "->" + if len(path) >= 2 and path[0] == 0 and isinstance(path[1], str): + output += f"->{path[1]}" + path = path[2:] + + for p in path: + if isinstance(p, str): + output += f".{p}" + elif isinstance(p, int): + output += f"[{p}]" + else: + static_assert_unreachable(p) + return output def dependencies(self) -> List[Expression]: return [self.struct_var] - def late_field_name(self) -> Optional[str]: + def make_reference(self) -> Optional["StructAccess"]: + field_path = self.late_field_path() + if field_path and len(field_path) >= 2 and field_path[-1] == 0: + return replace(self, field_path=field_path[:-1]) + return None + + def late_field_path(self) -> Optional[AccessPath]: # If we didn't have a type at the time when the struct access was # constructed, but now we do, compute field name. - if ( - self.field_name is None - and self.stack_info.global_info.typemap - and not self.has_late_field_name - ): + + if self.field_path is None and not self.checked_late_field_path: var = late_unwrap(self.struct_var) - self.field_name = get_field( - var.type, - self.offset, - target_size=self.target_size, - )[0] - self.has_late_field_name = True - return self.field_name + field_path, field_type, remaining_offset = var.type.get_deref_field( + self.offset, target_size=self.target_size + ) + if field_path is not None and remaining_offset == 0: + self.assert_valid_field_path(field_path) + self.field_path = field_path + self.type.unify(field_type) + + self.checked_late_field_path = True + return self.field_path def late_has_known_type(self) -> bool: - if self.late_field_name() is not None: + if self.late_field_path() is not None: return True - if self.offset == 0 and self.stack_info.global_info.typemap: + if self.offset == 0: var = late_unwrap(self.struct_var) if ( not self.stack_info.has_nonzero_access(var) and isinstance(var, AddressOf) and isinstance(var.expr, GlobalSymbol) - and var.expr.symbol_name - in self.stack_info.global_info.typemap.var_types + and var.expr.type_in_typemap ): return True return False @@ -1051,9 +1108,9 @@ class StructAccess(Expression): var = late_unwrap(self.struct_var) has_nonzero_access = self.stack_info.has_nonzero_access(var) - field_name = self.late_field_name() + field_path = self.late_field_path() - if field_name: + if field_path is not None and field_path != [0]: has_nonzero_access = True elif fmt.valid_syntax and (self.offset != 0 or has_nonzero_access): offset_str = ( @@ -1062,27 +1119,25 @@ class StructAccess(Expression): return f"MIPS2C_FIELD({var.format(fmt)}, {Type.ptr(self.type).format(fmt)}, {offset_str})" else: prefix = "unk" + ("_" if fmt.coding_style.unknown_underscore else "") - field_name = prefix + format_hex(self.offset) + field_path = [0, prefix + format_hex(self.offset)] + field_name = self.access_path_to_field_name(field_path) - if isinstance(var, AddressOf): - if isinstance(var.expr, GlobalSymbol) and var.expr.array_dim is not None: - needs_deref = True - else: - needs_deref = False - var = var.expr - else: - needs_deref = True + # Rewrite `(&x)->y` to `x.y` by stripping `AddressOf` & setting deref=False + deref = True + if ( + isinstance(var, AddressOf) + and not var.expr.type.is_array() + and field_name.startswith("->") + ): + var = var.expr + field_name = field_name.replace("->", ".", 1) + deref = False - if needs_deref: - if self.offset == 0 and not has_nonzero_access: - return f"*{var.format(fmt)}" - else: - return f"{parenthesize_for_struct_access(var, fmt)}->{field_name}" - else: - if self.offset == 0 and not has_nonzero_access: - return f"{var.format(fmt)}" - else: - return f"{parenthesize_for_struct_access(var, fmt)}.{field_name}" + # Rewrite `x->unk0` to `*x` and `x.unk0` to `x`, unless has_nonzero_access + if self.offset == 0 and not has_nonzero_access: + return f"{'*' if deref else ''}{var.format(fmt)}" + + return f"{parenthesize_for_struct_access(var, fmt)}{field_name}" @dataclass(frozen=True, eq=True) @@ -1178,6 +1233,9 @@ class Literal(Expression): ) return prefix + mid + suffix + def likely_partial_offset(self) -> bool: + return self.value % 2 ** 15 in (0, 2 ** 15 - 1) and self.value < 0x1000000 + @dataclass(frozen=True, eq=True) class AddressOf(Expression): @@ -1191,13 +1249,17 @@ class AddressOf(Expression): if isinstance(self.expr, GlobalSymbol): if self.expr.is_string_constant(): return self.expr.format_string_constant(fmt) - if self.expr.array_dim is not None: - return f"{self.expr.format(fmt)}" - + if self.expr.type.is_array(): + return f"{self.expr.format(fmt)}" if self.expr.type.is_function(): # Functions are automatically converted to function pointers # without an explicit `&` by the compiler return f"{self.expr.format(fmt)}" + if isinstance(self.expr, StructAccess): + # Simplify `&x[0]` into `x` + ref = self.expr.make_reference() + if ref: + return f"{ref.format(fmt)}" return f"&{self.expr.format(fmt)}" @@ -1819,11 +1881,7 @@ def deref( uw_var = early_unwrap(var) if isinstance(uw_var, BinaryOp) and uw_var.op == "+": for base, addend in [(uw_var.left, uw_var.right), (uw_var.right, uw_var.left)]: - if ( - isinstance(addend, Literal) - and addend.value % 2 ** 15 in [0, 2 ** 15 - 1] - and addend.value < 0x1000000 - ): + if isinstance(addend, Literal) and addend.likely_partial_offset(): offset += addend.value var = base break @@ -1839,31 +1897,20 @@ def deref( ) if array_expr is not None: return array_expr - field_name, new_type, _, _ = get_field(var.type, offset, target_size=size) - if field_name is not None: - new_type.unify(type) - type = new_type - - # Dereferencing pointers of known types - target = var.type.get_pointer_target() - if field_name is None and target is not None: - sub_size = target.get_size_bytes() - if sub_size == size and offset % size == 0: - # TODO: This only turns the deref into an ArrayAccess if the type - # is *known* to be an array (CType). This could be expanded to support - # arrays of other types. - if offset != 0 and target.is_ctype(): - index = Literal(value=offset // size, type=Type.s32()) - return ArrayAccess(var, index, type=target) - else: - # Don't emit an array access, but at least help type inference along - type = target + field_path, field_type, remaining_offset = var.type.get_deref_field( + offset, target_size=size + ) + if field_path is not None and remaining_offset == 0: + field_type.unify(type) + type = field_type + else: + field_path = None return StructAccess( struct_var=var, offset=offset, target_size=size, - field_name=field_name, + field_path=field_path, stack_info=stack_info, type=type, ) @@ -2222,7 +2269,7 @@ def handle_addi_real( var = stack_info.get_stack_var(imm.value, store=False) if isinstance(var, LocalVar): stack_info.add_local_var(var) - return AddressOf(var, type=Type.ptr(var.type)) + return AddressOf(var, type=var.type.reference()) else: return add_imm(source, imm, stack_info) @@ -2232,42 +2279,32 @@ def add_imm(source: Expression, imm: Expression, stack_info: StackInfo) -> Expre # addiu $reg1, $reg2, 0 is a move # (this happens when replacing %lo(...) by 0) return source - elif source.type.is_pointer(): + elif source.type.is_pointer_or_array(): # Pointer addition (this may miss some pointers that get detected later; # unfortunately that's hard to do anything about with mips_to_c's single-pass - # architecture. - if isinstance(imm, Literal): + # architecture). + if isinstance(imm, Literal) and not imm.likely_partial_offset(): array_access = array_access_from_add( source, imm.value, stack_info, target_size=None, ptr=True ) if array_access is not None: return array_access - field_name, subtype, ptr_type, array_dim = get_field( - source.type, imm.value, target_size=None + field_path, field_type, remaining_offset = source.type.get_deref_field( + imm.value, target_size=None ) - if field_name is not None: - if array_dim is not None: - return StructAccess( + if field_path is not None and remaining_offset == 0: + return AddressOf( + StructAccess( struct_var=source, offset=imm.value, target_size=None, - field_name=field_name, + field_path=field_path, stack_info=stack_info, - type=ptr_type, - ) - else: - return AddressOf( - StructAccess( - struct_var=source, - offset=imm.value, - target_size=None, - field_name=field_name, - stack_info=stack_info, - type=subtype, - ), - type=ptr_type, - ) + type=field_type, + ), + type=field_type.reference(), + ) if isinstance(imm, Literal): target = source.type.get_pointer_target() if target: @@ -2336,6 +2373,9 @@ def deref_unaligned( def handle_lwl(args: InstrArgs) -> Expression: + # Unaligned load for the left part of a register (lwl can technically merge with + # a pre-existing lwr, but doesn't in practice, so we treat this as a standard + # destination-first operation) ref = args.memory_ref(1) expr = deref_unaligned(ref, args.regs, args.stack_info) key: Tuple[int, object] @@ -2346,10 +2386,10 @@ def handle_lwl(args: InstrArgs) -> Expression: return Lwl(expr, key) -def handle_lwr(args: InstrArgs, old_value: Expression) -> Expression: - # This lwr may merge with an existing lwl, if it loads from the same target - # but with an offset that's +3. - uw_old_value = early_unwrap(old_value) +def handle_lwr(args: InstrArgs) -> Expression: + # Unaligned load for the right part of a register. This lwr may merge with an + # existing lwl, if it loads from the same target but with an offset that's +3. + uw_old_value = early_unwrap(args.reg(0)) ref = args.memory_ref(1) lwl_key: Tuple[int, object] if isinstance(ref, AddressMode): @@ -2429,6 +2469,17 @@ def handle_sra(args: InstrArgs) -> Expression: return BinaryOp(as_s32(lhs), ">>", as_intish(shift), type=Type.s32()) +def handle_conditional_move(args: InstrArgs, nonzero: bool) -> Expression: + op = "!=" if nonzero else "==" + type = Type.any_reg() + return TernaryOp( + BinaryOp.scmp(args.reg(2), op, Literal(0)), + as_type(args.reg(1), type, silent=True), + as_type(args.reg(0), type, silent=True), + type, + ) + + def format_f32_imm(num: int) -> str: packed = struct.pack(">I", num & (2 ** 32 - 1)) value = struct.unpack(">f", packed)[0] @@ -2550,7 +2601,7 @@ def array_access_from_add( return None base = expr.left addend = expr.right - if addend.type.is_pointer() and not base.type.is_pointer(): + if addend.type.is_pointer_or_array() and not base.type.is_pointer_or_array(): base, addend = addend, base index: Expression @@ -2583,38 +2634,48 @@ def array_access_from_add( pass else: # base->subarray[index] - substr_array = find_substruct_array(base.type, offset, scale) - if substr_array is None: + sub_path, sub_type, remaining_offset = base.type.get_deref_field( + offset, target_size=scale + ) + # Check if the last item in the path is `0`, which indicates the start of an array + # If it is, remove it: it will be replaced by `[index]` + if sub_path is None or len(sub_path) < 2 or sub_path[-1] != 0: return None - sub_field_name, sub_offset, elem_type = substr_array + sub_path.pop() base = StructAccess( struct_var=base, - offset=sub_offset, + offset=offset - remaining_offset, target_size=None, - field_name=sub_field_name, + field_path=sub_path, stack_info=stack_info, - type=Type.ptr(elem_type), + type=sub_type, ) - offset -= sub_offset - target_type = elem_type + offset = remaining_offset + target_type = sub_type - # Add .field if necessary ret: Expression = ArrayAccess(base, index, type=target_type) - field_name, new_type, ptr_type, array_dim = get_field( - base.type, offset, target_size=target_size + + # Add .field if necessary by wrapping ret in StructAccess(AddressOf(...)) + ret_ref = AddressOf(ret, type=ret.type.reference()) + field_path, field_type, remaining_offset = ret_ref.type.get_deref_field( + offset, target_size=target_size ) + if remaining_offset != 0: + field_path = None + if offset != 0 or (target_size is not None and target_size != scale): ret = StructAccess( - struct_var=AddressOf(ret, type=Type.ptr()), + struct_var=ret_ref, offset=offset, target_size=target_size, - field_name=field_name, + field_path=field_path, stack_info=stack_info, - type=ptr_type if array_dim is not None else new_type, + type=field_type, ) - if ptr and array_dim is None: - ret = AddressOf(ret, type=ptr_type) + if ptr: + ret = AddressOf(ret, type=ret.type.reference()) + return ret @@ -2623,9 +2684,11 @@ def handle_add(args: InstrArgs) -> Expression: rhs = args.reg(2) stack_info = args.stack_info type = Type.intptr() - if lhs.type.is_pointer(): + # Because lhs & rhs are in registers, it shouldn't be possible for them to be arrays. + # If they are, treat them the same as pointers anyways. + if lhs.type.is_pointer_or_array(): type = Type.ptr() - elif rhs.type.is_pointer(): + elif rhs.type.is_pointer_or_array(): type = Type.ptr() # addiu instructions can sometimes be emitted as addu instead, when the @@ -2726,7 +2789,7 @@ def function_abi(fn_sig: FunctionSignature, *, for_call: bool) -> Abi: only_floats = True slots: List[AbiArgSlot] = [] possible: List[Register] = [] - if fn_sig.return_type.is_struct_type(): + if fn_sig.return_type.is_struct(): # The ABI for struct returns is to pass a pointer to where it should be written # as the first argument. slots.append( @@ -2741,16 +2804,18 @@ def function_abi(fn_sig: FunctionSignature, *, for_call: bool) -> Abi: only_floats = False for ind, param in enumerate(fn_sig.params): - size, align = param.type.get_size_align_bytes() + # Array parameters decay into pointers + param_type = param.type.decay() + size, align = param_type.get_parameter_size_align_bytes() size = (size + 3) & ~3 - only_floats = only_floats and param.type.is_float() + only_floats = only_floats and param_type.is_float() offset = (offset + align - 1) & -align name = param.name reg2: Optional[Register] if ind < 2 and only_floats: reg = Register("f12" if ind == 0 else "f14") - is_double = param.type.is_float() and param.type.get_size_bits() == 64 - slots.append(AbiArgSlot(offset=offset, reg=reg, name=name, type=param.type)) + is_double = param_type.is_float() and param_type.get_size_bits() == 64 + slots.append(AbiArgSlot(offset=offset, reg=reg, name=name, type=param_type)) if is_double and not for_call: name2 = f"{name}_lo" if name else None reg2 = Register("f13" if ind == 0 else "f15") @@ -2765,7 +2830,7 @@ def function_abi(fn_sig: FunctionSignature, *, for_call: bool) -> Abi: name2 = f"{name}_unk{unk_offset:X}" if name and unk_offset else name reg2 = Register(f"a{i}") if i < 4 else None slots.append( - AbiArgSlot(offset=4 * i, reg=reg2, name=name2, type=param.type) + AbiArgSlot(offset=4 * i, reg=reg2, name=name2, type=param_type) ) offset += size @@ -2781,7 +2846,6 @@ def function_abi(fn_sig: FunctionSignature, *, for_call: bool) -> Abi: InstrSet = Set[str] InstrMap = Dict[str, Callable[[InstrArgs], Expression]] -LwrInstrMap = Dict[str, Callable[[InstrArgs, Expression], Expression]] CmpInstrMap = Dict[str, Callable[[InstrArgs], Condition]] StoreInstrMap = Dict[str, Callable[[InstrArgs], Optional[StoreStmt]]] MaybeInstrMap = Dict[str, Callable[[InstrArgs], Optional[Expression]]] @@ -3083,6 +3147,9 @@ CASES_DESTINATION_FIRST: InstrMap = { "mfc1": lambda a: a.reg(1), "mov.s": lambda a: a.reg(1), "mov.d": lambda a: as_f64(a.dreg(1)), + # Conditional moves + "movn": lambda a: handle_conditional_move(a, True), + "movz": lambda a: handle_conditional_move(a, False), # FCSR get "cfc1": lambda a: ErrorExpr("cfc1"), # Immediates @@ -3095,23 +3162,18 @@ CASES_DESTINATION_FIRST: InstrMap = { "lh": lambda a: handle_load(a, type=Type.s16()), "lhu": lambda a: handle_load(a, type=Type.u16()), "lw": lambda a: handle_load(a, type=Type.reg32(likely_float=False)), + "ld": lambda a: handle_load(a, type=Type.reg64(likely_float=False)), "lwu": lambda a: handle_load(a, type=Type.u32()), "lwc1": lambda a: handle_load(a, type=Type.reg32(likely_float=True)), "ldc1": lambda a: handle_load(a, type=Type.reg64(likely_float=True)), - # Unaligned load for the left part of a register (lwl can technically merge - # with a pre-existing lwr, but doesn't in practice, so we treat this as a - # standard destination-first operation) + # Unaligned loads "lwl": lambda a: handle_lwl(a), -} -CASES_LWR: LwrInstrMap = { - # Unaligned load for the right part of a register. Only writes a partial - # register. - "lwr": lambda a, old_value: handle_lwr(a, old_value), + "lwr": lambda a: handle_lwr(a), } def output_regs_for_instr( - instr: Instruction, typemap: Optional[TypeMap] + instr: Instruction, global_info: "GlobalInfo" ) -> List[Register]: def reg_at(index: int) -> List[Register]: reg = instr.args[index] @@ -3130,11 +3192,10 @@ def output_regs_for_instr( or mnemonic in CASES_NO_DEST ): return [] - if mnemonic == "jal" and typemap: + if mnemonic == "jal": fn_target = instr.args[0] if isinstance(fn_target, AsmGlobalSymbol): - c_fn = typemap.functions.get(fn_target.symbol_name) - if c_fn and c_fn.ret_type is None: + if global_info.is_function_known_void(fn_target.symbol_name): return [] if mnemonic in CASES_FN_CALL: return list(map(Register, ["f0", "f1", "v0", "v1"])) @@ -3142,8 +3203,6 @@ def output_regs_for_instr( return reg_at(1) if mnemonic in CASES_DESTINATION_FIRST: return reg_at(0) - if mnemonic in CASES_LWR: - return reg_at(0) if mnemonic in CASES_FLOAT_COMP: return [Register("condition_bit")] if mnemonic in CASES_HI_LO: @@ -3154,7 +3213,7 @@ def output_regs_for_instr( def regs_clobbered_until_dominator( - node: Node, typemap: Optional[TypeMap] + node: Node, global_info: "GlobalInfo" ) -> Set[Register]: if node.immediate_dominator is None: return set() @@ -3168,7 +3227,7 @@ def regs_clobbered_until_dominator( seen.add(n) for instr in n.block.instructions: with current_instr(instr): - clobbered.update(output_regs_for_instr(instr, typemap)) + clobbered.update(output_regs_for_instr(instr, global_info)) if instr.mnemonic in CASES_FN_CALL: clobbered.update(TEMP_REGS) stack.extend(n.parents) @@ -3176,7 +3235,7 @@ def regs_clobbered_until_dominator( def reg_always_set( - node: Node, reg: Register, typemap: Optional[TypeMap], *, dom_set: bool + node: Node, reg: Register, global_info: "GlobalInfo", *, dom_set: bool ) -> bool: if node.immediate_dominator is None: return False @@ -3194,7 +3253,7 @@ def reg_always_set( with current_instr(instr): if instr.mnemonic in CASES_FN_CALL and reg in TEMP_REGS: clobbered = True - if reg in output_regs_for_instr(instr, typemap): + if reg in output_regs_for_instr(instr, global_info): clobbered = False if clobbered == True: return False @@ -3653,11 +3712,9 @@ def translate_node_body(node: Node, regs: RegInfo, stack_info: StackInfo) -> Blo if isinstance(fn_target, AddressOf) and isinstance( fn_target.expr, GlobalSymbol ): - typemap = stack_info.global_info.typemap - if typemap: - c_fn = typemap.functions.get(fn_target.expr.symbol_name) - if c_fn and c_fn.ret_type is None: - is_known_void = True + is_known_void = stack_info.global_info.is_function_known_void( + fn_target.expr.symbol_name + ) elif isinstance(fn_target, Literal): pass else: @@ -3838,13 +3895,6 @@ def translate_node_body(node: Node, regs: RegInfo, stack_info: StackInfo) -> Blo if (len(mn_parts) >= 2 and mn_parts[1] == "d") or mnemonic == "ldc1": set_reg(target.other_f64_reg(), SecondF64Half()) - elif mnemonic in CASES_LWR: - assert mnemonic == "lwr" - target = args.reg_ref(0) - old_value = args.reg(0) - val = CASES_LWR[mnemonic](args, old_value) - set_reg(target, val) - else: expr = ErrorExpr(f"unknown instruction: {instr}") if args.count() >= 1 and isinstance(args.raw_arg(0), Register): @@ -3941,7 +3991,6 @@ def translate_graph_from_block( # Translate everything dominated by this node, now that we know our own # final register state. This will eventually reach every node. - typemap = stack_info.global_info.typemap for child in node.immediately_dominates: if isinstance(child, TerminalNode): continue @@ -3949,9 +3998,11 @@ def translate_graph_from_block( for reg, data in regs.contents.items(): new_regs.set_with_meta(reg, data.value, RegMeta(inherited=True)) - phi_regs = regs_clobbered_until_dominator(child, typemap) + phi_regs = regs_clobbered_until_dominator(child, stack_info.global_info) for reg in phi_regs: - if reg_always_set(child, reg, typemap, dom_set=(reg in regs)): + if reg_always_set( + child, reg, stack_info.global_info, dom_set=(reg in regs) + ): expr: Optional[Expression] = stack_info.maybe_get_register_var(reg) if expr is None: expr = PhiExpr( @@ -3984,8 +4035,9 @@ def resolve_types_late(stack_info: StackInfo) -> None: class GlobalInfo: asm_data: AsmData local_functions: Set[str] - typemap: Optional[TypeMap] global_symbol_map: Dict[str, GlobalSymbol] = field(default_factory=dict) + typemap: TypeMap = field(default_factory=TypeMap) + typepool: TypePool = field(default_factory=TypePool) def asm_data_value(self, sym_name: str) -> Optional[AsmDataEntry]: return self.asm_data.values.get(sym_name) @@ -4000,16 +4052,24 @@ class GlobalInfo: asm_data_entry=self.asm_data_value(sym_name), ) - type = Type.ptr(sym.type) - if self.typemap: - ctype = self.typemap.var_types.get(sym_name) - if ctype: - ctype_type, dim = ptr_type_from_ctype(ctype, self.typemap) - sym.array_dim = dim + fn = self.typemap.functions.get(sym_name) + ctype: Optional[CType] + if fn is not None: + ctype = fn.type + else: + ctype = self.typemap.var_types.get(sym_name) + if ctype is not None: sym.type_in_typemap = True - type.unify(ctype_type) - type = ctype_type - return AddressOf(sym, type=type) + sym.type.unify(Type.ctype(ctype, self.typemap, self.typepool)) + + return AddressOf(sym, type=sym.type.reference()) + + def is_function_known_void(self, sym_name: str) -> bool: + """Return True if the function exists in the context, and has no return value""" + fn = self.typemap.functions.get(sym_name) + if fn is None: + return False + return fn.ret_type is None def initializer_for_symbol( self, sym: GlobalSymbol, fmt: Formatter @@ -4044,12 +4104,12 @@ class GlobalInfo: def for_element_type(type: Type) -> Optional[str]: """Return the initializer for a single element of type `type`""" - if type.is_ctype(): - ctype_fields = type.get_ctype_fields() - if not ctype_fields: + if type.is_struct() or type.is_array(): + struct_fields = type.get_initializer_fields() + if not struct_fields: return None members = [] - for field in ctype_fields: + for field in struct_fields: if isinstance(field, int): # Check that all padding bytes are 0 padding = read_uint(field) @@ -4211,7 +4271,6 @@ class GlobalInfo: continue qualifier = f"{qualifier} " if qualifier else "" - name = f"{name}[{sym.array_dim}]" if sym.array_dim is not None else name value = f" = {value}" if value else "" comment = f" // {'; '.join(comments)}" if comments else "" lines.append( @@ -4245,7 +4304,6 @@ def translate_to_ast( flow_graph: FlowGraph = build_flowgraph(function, global_info.asm_data) stack_info = get_stack_info(function, global_info, flow_graph) start_regs: RegInfo = RegInfo(stack_info=stack_info) - typemap = global_info.typemap start_regs[Register("sp")] = GlobalSymbol("sp", type=Type.ptr()) for reg in SAVED_REGS: @@ -4255,14 +4313,11 @@ def translate_to_ast( assert offset % 4 == 0 return PassedInArg(offset, copied=False, stack_info=stack_info, type=type) - if typemap and function.name in typemap.functions: - fn_type = type_from_ctype(typemap.functions[function.name].type, typemap) - fn_decl_provided = True - else: - fn_type = Type.function() - fn_decl_provided = False - fn_type.unify(global_info.address_of_gsym(function.name).expr.type) + fn_sym = global_info.address_of_gsym(function.name).expr + assert isinstance(fn_sym, GlobalSymbol) + fn_type = fn_sym.type + fn_type.unify(Type.function()) fn_sig = Type.ptr(fn_type).get_function_pointer_signature() assert fn_sig is not None, "fn_type is known to be a function" return_type = fn_sig.return_type @@ -4317,7 +4372,7 @@ def translate_to_ast( return_reg: Optional[Register] = None if not options.void and not return_type.is_void(): - return_reg = determine_return_register(return_blocks, fn_decl_provided) + return_reg = determine_return_register(return_blocks, fn_sym.type_in_typemap) if return_reg is not None: for b in return_blocks: diff --git a/backend/mips_to_c/src/types.py b/backend/mips_to_c/src/types.py index a7b2fdfb..1eb244b6 100644 --- a/backend/mips_to_c/src/types.py +++ b/backend/mips_to_c/src/types.py @@ -1,15 +1,14 @@ import copy from dataclasses import dataclass, field -from typing import List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union import pycparser.c_ast as ca from .c_types import ( CType, Struct, + StructUnion as CStructUnion, TypeMap, - equal_types, - get_struct, parse_constant_int, parse_function, parse_struct, @@ -17,23 +16,74 @@ from .c_types import ( resolve_typedefs, set_decl_name, to_c, - var_size_align, ) -from .error import DecompFailure +from .error import DecompFailure, static_assert_unreachable from .options import Formatter +# AccessPath represents a struct/array path, with ints for array access, and +# strs for struct fields. Ex: `["foo", 3, "bar"]` represents `.foo[3].bar` +AccessPath = List[Union[str, int]] + + +@dataclass +class TypePool: + """ + Mutable shared state for Types, maintaining the set of available struct types. + """ + + structs: Set["StructDeclaration"] = field(default_factory=set) + structs_by_tag_name: Dict[str, "StructDeclaration"] = field(default_factory=dict) + structs_by_ctype: Dict[CStructUnion, "StructDeclaration"] = field( + default_factory=dict + ) + + def get_struct_for_ctype( + self, + ctype: CStructUnion, + ) -> Optional["StructDeclaration"]: + """Return the StructDeclaration for a given ctype struct, if known""" + struct = self.structs_by_ctype.get(ctype) + if struct is not None: + return struct + if ctype.name is not None: + return self.structs_by_tag_name.get(ctype.name) + return None + + def add_struct( + self, + struct: "StructDeclaration", + ctype_or_tag_name: Union[CStructUnion, str], + ) -> None: + """Add the struct declaration to the set of known structs for later access""" + self.structs.add(struct) + + tag_name: Optional[str] + if isinstance(ctype_or_tag_name, str): + tag_name = ctype_or_tag_name + else: + ctype = ctype_or_tag_name + tag_name = ctype.name + self.structs_by_ctype[ctype] = struct + + if tag_name is not None: + assert ( + tag_name not in self.structs_by_tag_name + ), f"Duplicate tag: {tag_name}" + self.structs_by_tag_name[tag_name] = struct + @dataclass(eq=False) class TypeData: - K_INT = 1 - K_PTR = 2 - K_FLOAT = 4 - K_CTYPE = 8 - K_FN = 16 - K_VOID = 32 + K_INT = 1 << 0 + K_PTR = 1 << 1 + K_FLOAT = 1 << 2 + K_FN = 1 << 3 + K_VOID = 1 << 4 + K_ARRAY = 1 << 5 + K_STRUCT = 1 << 6 K_INTPTR = K_INT | K_PTR K_ANYREG = K_INT | K_PTR | K_FLOAT - K_ANY = K_INT | K_PTR | K_FLOAT | K_CTYPE | K_FN | K_VOID + K_ANY = K_INT | K_PTR | K_FLOAT | K_FN | K_VOID | K_ARRAY | K_STRUCT SIGNED = 1 UNSIGNED = 2 @@ -41,14 +91,14 @@ class TypeData: kind: int = K_ANY likely_kind: int = K_ANY # subset of kind - size: Optional[int] = None + size_bits: Optional[int] = None uf_parent: Optional["TypeData"] = None sign: int = ANY_SIGN # K_INT - ptr_to: Optional["Type"] = None # K_PTR - typemap: Optional[TypeMap] = None # K_CTYPE - ctype_ref: Optional[CType] = None # K_CTYPE + ptr_to: Optional["Type"] = None # K_PTR | K_ARRAY fn_sig: Optional["FunctionSignature"] = None # K_FN + array_dim: Optional[int] = None # K_ARRAY + struct: Optional["StructDeclaration"] = None # K_STRUCT def __post_init__(self) -> None: assert self.kind @@ -107,52 +157,56 @@ class Type: else: seen = seen | {x, y} - if x.size is not None and y.size is not None and x.size != y.size: + if ( + x.size_bits is not None + and y.size_bits is not None + and x.size_bits != y.size_bits + ): return False kind = x.kind & y.kind likely_kind = x.likely_kind & y.likely_kind - size = x.size if x.size is not None else y.size - typemap = x.typemap if x.typemap is not None else y.typemap - ctype_ref = x.ctype_ref if x.ctype_ref is not None else y.ctype_ref + size_bits = x.size_bits if x.size_bits is not None else y.size_bits ptr_to = x.ptr_to if x.ptr_to is not None else y.ptr_to fn_sig = x.fn_sig if x.fn_sig is not None else y.fn_sig + array_dim = x.array_dim if x.array_dim is not None else y.array_dim + struct = x.struct if x.struct is not None else y.struct sign = x.sign & y.sign - if size not in (None, 32, 64): + if size_bits not in (None, 32, 64): kind &= ~TypeData.K_FLOAT - if size not in (None, 32): + if size_bits not in (None, 32): kind &= ~TypeData.K_PTR - if size not in (None,): + if size_bits not in (None,): kind &= ~TypeData.K_FN - if size not in (None, 0): + if size_bits not in (None, 0): kind &= ~TypeData.K_VOID likely_kind &= kind if kind == 0 or sign == 0: return False if kind == TypeData.K_PTR: - size = 32 + size_bits = 32 if sign != TypeData.ANY_SIGN: assert kind == TypeData.K_INT - if x.ctype_ref is not None and y.ctype_ref is not None: - assert typemap is not None - x_ctype = resolve_typedefs(x.ctype_ref, typemap) - y_ctype = resolve_typedefs(y.ctype_ref, typemap) - if not equal_types(x_ctype, y_ctype): - return False if x.ptr_to is not None and y.ptr_to is not None: if not x.ptr_to.unify(y.ptr_to, seen=seen): return False if x.fn_sig is not None and y.fn_sig is not None: if not x.fn_sig.unify(y.fn_sig, seen=seen): return False + if x.array_dim is not None and y.array_dim is not None: + if x.array_dim != y.array_dim: + return False + if x.struct is not None and y.struct is not None: + if not x.struct.unify(y.struct, seen=seen): + return False x.kind = kind x.likely_kind = likely_kind - x.size = size + x.size_bits = size_bits x.sign = sign x.ptr_to = ptr_to - x.typemap = typemap - x.ctype_ref = ctype_ref x.fn_sig = fn_sig + x.array_dim = array_dim + x.struct = struct y.uf_parent = x return True @@ -172,49 +226,46 @@ class Type: def is_pointer(self) -> bool: return self.data().kind == TypeData.K_PTR + def is_pointer_or_array(self) -> bool: + return self.data().kind in (TypeData.K_PTR, TypeData.K_ARRAY) + def is_int(self) -> bool: return self.data().kind == TypeData.K_INT def is_reg(self) -> bool: return (self.data().kind & ~TypeData.K_ANYREG) == 0 - def is_ctype(self) -> bool: - return self.data().kind == TypeData.K_CTYPE - def is_function(self) -> bool: return self.data().kind == TypeData.K_FN def is_void(self) -> bool: return self.data().kind == TypeData.K_VOID + def is_array(self) -> bool: + return self.data().kind == TypeData.K_ARRAY + + def is_struct(self) -> bool: + return self.data().kind == TypeData.K_STRUCT + def is_unsigned(self) -> bool: return self.data().sign == TypeData.UNSIGNED def get_size_bits(self) -> Optional[int]: - return self.data().size + return self.data().size_bits def get_size_bytes(self) -> Optional[int]: - size = self.get_size_bits() - return None if size is None else size // 8 + size_bits = self.get_size_bits() + return None if size_bits is None else size_bits // 8 - def get_size_align_bytes(self) -> Tuple[int, int]: + def get_parameter_size_align_bytes(self) -> Tuple[int, int]: + """Return the size & alignment of self when used as a function argument""" data = self.data() - if data.kind == TypeData.K_CTYPE and data.ctype_ref is not None: - assert data.typemap is not None - return var_size_align(data.ctype_ref, data.typemap) + if self.is_struct(): + assert data.struct is not None + return data.struct.size, data.struct.align size = (self.get_size_bits() or 32) // 8 return size, size - def is_struct_type(self) -> bool: - data = self.data() - if data.kind != TypeData.K_CTYPE or data.ctype_ref is None: - return False - assert data.typemap is not None - ctype = resolve_typedefs(data.ctype_ref, data.typemap) - if not isinstance(ctype, ca.TypeDecl): - return False - return isinstance(ctype.type, (ca.Struct, ca.Union)) - def get_pointer_target(self) -> Optional["Type"]: """If self is a pointer-to-a-Type, return the Type""" data = self.data() @@ -222,13 +273,29 @@ class Type: return data.ptr_to return None - def get_ctype_and_typemap(self) -> Optional[Tuple[CType, TypeMap]]: - """If self is a CType, return the CType and the TypeMap it came from""" + def reference(self) -> "Type": + """Return a pointer to self. If self is an array, decay the type to a pointer""" + if self.is_array(): + data = self.data() + assert data.ptr_to is not None + return Type.ptr(data.ptr_to) + return Type.ptr(self) + + def decay(self) -> "Type": + """If self is an array, return a pointer to the element type. Otherwise, return self.""" + if self.is_array(): + data = self.data() + assert data.ptr_to is not None + return Type.ptr(data.ptr_to) + return self + + def get_array(self) -> Tuple[Optional["Type"], Optional[int]]: + """If self is an array, return a tuple of the inner Type & the array dimension""" + if not self.is_array(): + return None, None data = self.data() - if data.kind == TypeData.K_CTYPE: - assert data.ctype_ref is not None and data.typemap is not None - return data.ctype_ref, data.typemap - return None + assert data.ptr_to is not None + return (data.ptr_to, data.array_dim) def get_function_pointer_signature(self) -> Optional["FunctionSignature"]: """If self is a function pointer, return the FunctionSignature""" @@ -239,67 +306,170 @@ class Type: return ptr_to.fn_sig return None - def get_ctype_fields( + def get_struct_declaration(self) -> Optional["StructDeclaration"]: + """If self is a struct, return the StructDeclaration""" + if self.is_struct(): + data = self.data() + assert data.struct is not None + return data.struct + return None + + GetFieldResult = Tuple[Optional[AccessPath], "Type", int] + + def get_field(self, offset: int, *, target_size: Optional[int]) -> GetFieldResult: + """ + Locate the field in self at the appropriate offset, and optionally + with an exact target size (both values in bytes). + The target size can be used to disambiguate different fields in a union, or + different levels inside nested structs. + + The return value is a tuple of `(field_path, field_type, remaining_offset)`. + If no field is found, the result is `(None, Type.any(), offset)`. + If `remaining_offset` is nonzero, then there was *not* a field at the exact + offset provided; the returned field is at `(offset - remaining_offset)`. + """ + NO_MATCHING_FIELD: Type.GetFieldResult = (None, Type.any(), offset) + + if offset < 0: + return NO_MATCHING_FIELD + + if self.is_array(): + # Array types always have elements with known size + data = self.data() + assert data.ptr_to is not None + size = data.ptr_to.get_size_bytes() + assert size is not None + + index, remaining_offset = divmod(offset, size) + if data.array_dim is not None and index >= data.array_dim: + return NO_MATCHING_FIELD + assert index >= 0 and remaining_offset >= 0 + + # Assume this is an array access at the computed `index`. + # Check if there is a field at the `remaining_offset` offset + subpath, subtype, sub_remaining_offset = data.ptr_to.get_field( + remaining_offset, target_size=target_size + ) + if subpath is not None: + # Success: prepend `index` and return + subpath.insert(0, index) + return subpath, subtype, sub_remaining_offset + return NO_MATCHING_FIELD + + if self.is_struct(): + data = self.data() + assert data.struct is not None + possible_fields = data.struct.fields_at_offset(offset) + if not possible_fields: + return NO_MATCHING_FIELD + possible_results: List[Type.GetFieldResult] = [] + if target_size is None or target_size == self.get_size_bytes(): + possible_results.append(([], self, offset)) + for field in possible_fields: + inner_offset_bits = offset - field.offset + subpath, subtype, sub_remaining_offset = field.type.get_field( + inner_offset_bits, target_size=target_size + ) + if subpath is None: + continue + subpath.insert(0, field.name) + possible_results.append((subpath, subtype, sub_remaining_offset)) + if ( + target_size is not None + and target_size == subtype.get_size_bytes() + and sub_remaining_offset == 0 + ): + return possible_results[-1] + zero_offset_results = [r for r in possible_results if r[2] == 0] + if zero_offset_results: + return zero_offset_results[0] + if possible_results: + return possible_results[0] + + if offset == 0 and ( + target_size is None or target_size == self.get_size_bytes() + ): + # The whole type itself is a match + return [], self, 0 + + return NO_MATCHING_FIELD + + def get_deref_field( + self, offset: int, *, target_size: Optional[int] + ) -> GetFieldResult: + """ + Similar to `.get_field()`, but treat self as a pointer and find the field in the + pointer's target. The return value has the same semantics as `.get_field()`. + + If successful, the first item in the resulting `field_path` will be `0`. + This mirrors how `foo[0].bar` and `foo->bar` are equivalent in C. + """ + NO_MATCHING_FIELD: Type.GetFieldResult = (None, Type.any(), offset) + + target = self.get_pointer_target() + if target is None: + return NO_MATCHING_FIELD + + # Assume the pointer is to a single object, and not an array. + field_path, field_type, remaining_offset = target.get_field( + offset, target_size=target_size + ) + if field_path is not None: + field_path.insert(0, 0) + return field_path, field_type, remaining_offset + + def get_initializer_fields( self, ) -> Optional[List[Union[int, "Type"]]]: """ - If self is a CType, get a list of fields, suitable for creating an initializer, - or return None if an initializer cannot be made (e.g. a struct with bitfields) + If self is a struct or array (i.e. initialized with `{...}` syntax), then + return a list of fields suitable for creating an initializer. + Return None if an initializer cannot be made (e.g. a struct with bitfields) - Struct padding is represented by an int in the list, otherwise the list members + Padding is represented by an int in the list, otherwise the list fields denote the field's Type. """ data = self.data() - if data.kind != TypeData.K_CTYPE or data.ctype_ref is None: - return None - assert data.typemap is not None - ctype = resolve_typedefs(data.ctype_ref, data.typemap) - - # ArrayDecls are still used to when representing pointers-to-arrays, or - # multidimensional arrays. - # Treat an array of length N as a struct with N (identical) members - if isinstance(ctype, ca.ArrayDecl): - field_type, dim = array_type_and_dim(ctype, data.typemap) - if not dim: - # Do not support zero-sized arrays + if self.is_array(): + assert data.ptr_to is not None + if data.array_dim is None: return None - return [field_type] * dim - # Lookup the c_types.Struct representation - if not isinstance(ctype, ca.TypeDecl): - return None - if not isinstance(ctype.type, (ca.Struct, ca.Union)): - return None - struct = get_struct(ctype.type, data.typemap) - if not struct or struct.has_bitfields: - # Bitfields aren't supported; they aren't represented in `struct.fields` - return None + return [data.ptr_to] * data.array_dim - output: List[Union[int, Type]] = [] - position = 0 - for offset, fields in sorted(struct.fields.items()): - if offset < position: - # Overlapping fields, e.g. from expanded struct paths - continue - elif offset > position: - # Padding bytes - output.append(offset - position) - - # Choose the first field in a union, or the unexpanded name in a struct - field = fields[0] - field_type = type_from_ctype(field.type, data.typemap, array_decay=False) - size = field_type.get_size_bytes() - if not size: + if self.is_struct(): + assert data.struct is not None + if data.struct.has_bitfields: + # TODO: Support bitfields return None - output.append(field_type) - position = offset + size - assert position <= struct.size - if position < struct.size: - # Insert padding bytes - output.append(struct.size - position) + output: List[Union[int, Type]] = [] + position = 0 - return output + def add_padding(upto: int) -> None: + nonlocal position + nonlocal output + assert upto >= position + if upto > position: + output.append(upto - position) + + for field in data.struct.fields: + assert field.offset >= position, "overlapping fields" + + add_padding(field.offset) + field_size = field.type.get_size_bytes() + assert field_size is not None + output.append(field.type) + position = field.offset + field_size + + # Unions only have an initializer for the first field + if data.struct.is_union: + break + + add_padding(data.struct.size) + return output + + return None def to_decl(self, name: str, fmt: Formatter) -> str: decl = ca.Decl( @@ -338,14 +508,14 @@ class Type: if data in seen: return simple_ctype(unk_symbol) seen.add(data) - size = data.size or 32 + size_bits = data.size_bits or 32 sign = "s" if data.sign & TypeData.SIGNED else "u" if (data.kind & TypeData.K_ANYREG) == TypeData.K_ANYREG and ( data.likely_kind & (TypeData.K_INT | TypeData.K_FLOAT) ) not in (TypeData.K_INT, TypeData.K_FLOAT): - if data.size is not None: - return simple_ctype(f"{unk_symbol}{size}") + if data.size_bits is not None: + return simple_ctype(f"{unk_symbol}{size_bits}") return simple_ctype(unk_symbol) if ( @@ -353,24 +523,23 @@ class Type: or (data.likely_kind & (TypeData.K_FLOAT | TypeData.K_INT)) == TypeData.K_FLOAT ): - return simple_ctype(f"f{size}") + return simple_ctype(f"f{size_bits}") if data.kind == TypeData.K_PTR: + target_ctype: CType if data.ptr_to is None: - return ca.PtrDecl(type=simple_ctype("void"), quals=[]) - return ca.PtrDecl(type=data.ptr_to._to_ctype(seen, fmt), quals=[]) + target_ctype = simple_ctype("void") + else: + target_ctype = data.ptr_to._to_ctype(seen.copy(), fmt) - if data.kind == TypeData.K_CTYPE: - if data.ctype_ref is None: - return simple_ctype(unk_symbol) - ctype = copy.deepcopy(data.ctype_ref) - if isinstance(ctype, ca.TypeDecl) and isinstance( - ctype.type, (ca.Struct, ca.Union) - ): - if ctype.type.name is not None: - # Remove struct field declarations for named structs - ctype.type.decls = None - return ctype + # Strip parameter names from function pointers + if isinstance(target_ctype, ca.FuncDecl) and target_ctype.args: + for arg in target_ctype.args.params: + if isinstance(arg, ca.Decl): + arg.name = None + set_decl_name(arg) + + return ca.PtrDecl(type=target_ctype, quals=[]) if data.kind == TypeData.K_FN: assert data.fn_sig is not None @@ -401,7 +570,29 @@ class Type: if data.kind == TypeData.K_VOID: return simple_ctype("void") - return simple_ctype(f"{sign}{size}") + if data.kind == TypeData.K_ARRAY: + assert data.ptr_to is not None + dim: Optional[ca.Constant] = None + if data.array_dim is not None: + dim = ca.Constant(value=str(data.array_dim), type="") + return ca.ArrayDecl( + type=data.ptr_to._to_ctype(seen.copy(), fmt), + dim=dim, + dim_quals=[], + ) + + if data.kind == TypeData.K_STRUCT: + assert data.struct is not None + if data.struct.typedef_name: + return simple_ctype(data.struct.typedef_name) + # If there's no typedef or tag name, then label it as `_anonymous` + name = data.struct.tag_name or "_anonymous" + Class = ca.Union if data.struct.is_union else ca.Struct + return ca.TypeDecl( + declname=name, type=ca.Struct(name=name, decls=None), quals=[] + ) + + return simple_ctype(f"{sign}{size_bits}") def format(self, fmt: Formatter) -> str: return self.to_decl("", fmt) @@ -418,11 +609,12 @@ class Type: ("I" if data.kind & TypeData.K_INT else "") + ("P" if data.kind & TypeData.K_PTR else "") + ("F" if data.kind & TypeData.K_FLOAT else "") - + ("C" if data.kind & TypeData.K_CTYPE else "") + ("N" if data.kind & TypeData.K_FN else "") + ("V" if data.kind & TypeData.K_VOID else "") + + ("A" if data.kind & TypeData.K_ARRAY else "") + + ("S" if data.kind & TypeData.K_STRUCT else "") ) - sizestr = str(data.size) if data.size is not None else "?" + sizestr = str(data.size_bits) if data.size_bits is not None else "?" return f"Type({signstr + kindstr + sizestr})" @staticmethod @@ -443,13 +635,7 @@ class Type: @staticmethod def ptr(type: Optional["Type"] = None) -> "Type": - return Type(TypeData(kind=TypeData.K_PTR, size=32, ptr_to=type)) - - @staticmethod - def _ctype(ctype: CType, typemap: TypeMap, size: Optional[int]) -> "Type": - return Type( - TypeData(kind=TypeData.K_CTYPE, size=size, ctype_ref=ctype, typemap=typemap) - ) + return Type(TypeData(kind=TypeData.K_PTR, size_bits=32, ptr_to=type)) @staticmethod def function(fn_sig: Optional["FunctionSignature"] = None) -> "Type": @@ -459,7 +645,7 @@ class Type: @staticmethod def f32() -> "Type": - return Type(TypeData(kind=TypeData.K_FLOAT, size=32)) + return Type(TypeData(kind=TypeData.K_FLOAT, size_bits=32)) @staticmethod def floatish() -> "Type": @@ -467,58 +653,58 @@ class Type: @staticmethod def f64() -> "Type": - return Type(TypeData(kind=TypeData.K_FLOAT, size=64)) + return Type(TypeData(kind=TypeData.K_FLOAT, size_bits=64)) @staticmethod def s8() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=8, sign=TypeData.SIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=8, sign=TypeData.SIGNED)) @staticmethod def u8() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=8, sign=TypeData.UNSIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=8, sign=TypeData.UNSIGNED)) @staticmethod def s16() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=16, sign=TypeData.SIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=16, sign=TypeData.SIGNED)) @staticmethod def u16() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=16, sign=TypeData.UNSIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=16, sign=TypeData.UNSIGNED)) @staticmethod def s32() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=32, sign=TypeData.SIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=32, sign=TypeData.SIGNED)) @staticmethod def u32() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=32, sign=TypeData.UNSIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=32, sign=TypeData.UNSIGNED)) @staticmethod def s64() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=64, sign=TypeData.SIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=64, sign=TypeData.SIGNED)) @staticmethod def u64() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=64, sign=TypeData.UNSIGNED)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=64, sign=TypeData.UNSIGNED)) @staticmethod def int64() -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=64)) + return Type(TypeData(kind=TypeData.K_INT, size_bits=64)) @staticmethod - def int_of_size(size: int) -> "Type": - return Type(TypeData(kind=TypeData.K_INT, size=size)) + def int_of_size(size_bits: int) -> "Type": + return Type(TypeData(kind=TypeData.K_INT, size_bits=size_bits)) @staticmethod def reg32(*, likely_float: bool) -> "Type": likely = TypeData.K_FLOAT if likely_float else TypeData.K_INTPTR - return Type(TypeData(kind=TypeData.K_ANYREG, likely_kind=likely, size=32)) + return Type(TypeData(kind=TypeData.K_ANYREG, likely_kind=likely, size_bits=32)) @staticmethod def reg64(*, likely_float: bool) -> "Type": kind = TypeData.K_FLOAT | TypeData.K_INT likely = TypeData.K_FLOAT if likely_float else TypeData.K_INT - return Type(TypeData(kind=kind, likely_kind=likely, size=64)) + return Type(TypeData(kind=kind, likely_kind=likely, size_bits=64)) @staticmethod def bool() -> "Type": @@ -526,7 +712,79 @@ class Type: @staticmethod def void() -> "Type": - return Type(TypeData(kind=TypeData.K_VOID, size=0)) + return Type(TypeData(kind=TypeData.K_VOID, size_bits=0)) + + @staticmethod + def array(type: "Type", dim: Optional[int]) -> "Type": + # Array elements must have a known size + el_size = type.get_size_bits() + assert el_size is not None + + size_bits = None if dim is None else (el_size * dim) + return Type( + TypeData( + kind=TypeData.K_ARRAY, size_bits=size_bits, ptr_to=type, array_dim=dim + ) + ) + + @staticmethod + def struct(st: "StructDeclaration") -> "Type": + return Type(TypeData(kind=TypeData.K_STRUCT, size_bits=st.size * 8, struct=st)) + + @staticmethod + def ctype(ctype: CType, typemap: TypeMap, typepool: TypePool) -> "Type": + real_ctype = resolve_typedefs(ctype, typemap) + if isinstance(real_ctype, ca.ArrayDecl): + dim = None + try: + if real_ctype.dim is not None: + dim = parse_constant_int(real_ctype.dim, typemap) + except DecompFailure: + pass + inner_type = Type.ctype(real_ctype.type, typemap, typepool) + return Type.array(inner_type, dim) + if isinstance(real_ctype, ca.PtrDecl): + return Type.ptr(Type.ctype(real_ctype.type, typemap, typepool)) + if isinstance(real_ctype, ca.FuncDecl): + fn = parse_function(real_ctype) + assert fn is not None + fn_sig = FunctionSignature( + return_type=Type.void(), + is_variadic=fn.is_variadic, + ) + if fn.ret_type is not None: + fn_sig.return_type = Type.ctype(fn.ret_type, typemap, typepool) + if fn.params is not None: + fn_sig.params = [ + FunctionParam( + name=param.name or "", + type=Type.ctype(param.type, typemap, typepool), + ) + for param in fn.params + ] + fn_sig.params_known = True + return Type.function(fn_sig) + if isinstance(real_ctype, ca.TypeDecl): + if isinstance(real_ctype.type, (ca.Struct, ca.Union)): + return Type.struct( + StructDeclaration.from_ctype(real_ctype.type, typemap, typepool) + ) + names = ( + ["int"] + if isinstance(real_ctype.type, ca.Enum) + else real_ctype.type.names + ) + if "double" in names: + return Type.f64() + if "float" in names: + return Type.f32() + if "void" in names: + return Type.void() + size_bits = 8 * primitive_size(real_ctype.type) + assert size_bits > 0 + sign = TypeData.UNSIGNED if "unsigned" in names else TypeData.SIGNED + return Type(TypeData(kind=TypeData.K_INT, size_bits=size_bits, sign=sign)) + static_assert_unreachable(real_ctype) @dataclass(eq=False) @@ -599,164 +857,105 @@ class FunctionSignature: return can_unify -def type_from_ctype(ctype: CType, typemap: TypeMap, array_decay: bool = True) -> Type: - real_ctype = resolve_typedefs(ctype, typemap) - if isinstance(real_ctype, ca.ArrayDecl): - inner_type, dim = array_type_and_dim(real_ctype, typemap) - if array_decay: - return Type.ptr(inner_type) - size = inner_type.get_size_bits() - if size is not None and dim is not None: - size *= dim - else: - size = None - return Type._ctype(real_ctype, typemap, size=size) - if isinstance(real_ctype, ca.PtrDecl): - return Type.ptr(type_from_ctype(real_ctype.type, typemap, array_decay=False)) - if isinstance(real_ctype, ca.FuncDecl): - fn = parse_function(real_ctype) - assert fn is not None - fn_sig = FunctionSignature( - return_type=Type.void(), - is_variadic=fn.is_variadic, +@dataclass(eq=False) +class StructDeclaration: + """Representation of a C struct or union""" + + @dataclass(eq=False) + class StructField: + type: Type + offset: int + name: str + + size: int + align: int + tag_name: Optional[str] + typedef_name: Optional[str] + fields: List[StructField] # sorted by `.offset` + has_bitfields: bool + is_union: bool + + def unify( + self, + other: "StructDeclaration", + *, + seen: Optional[Set["TypeData"]] = None, + ) -> bool: + # NB: Currently, the only structs that exist are defined from ctypes in the typemap, + # so for now we can use reference equality to check if two structs are compatible. + return self is other + + def fields_at_offset(self, offset: int) -> List[StructField]: + """Return the list of StructFields which contain the given offset (in bits)""" + fields = [] + for field in self.fields: + # We assume fields are sorted by `offset`, ascending + if field.offset > offset: + break + field_size = field.type.get_size_bytes() + assert field_size is not None + if field.offset + field_size < offset: + continue + fields.append(field) + return fields + + @staticmethod + def from_ctype( + ctype: CStructUnion, typemap: TypeMap, typepool: TypePool + ) -> "StructDeclaration": + """ + Return StructDeclaration for a given ctype struct or union, constructing it + and registering it in the typepool if it does not already exist. + """ + existing_struct = typepool.get_struct_for_ctype(ctype) + if existing_struct: + return existing_struct + + struct = parse_struct(ctype, typemap) + + typedef_name: Optional[str] = None + if ctype in typemap.struct_typedefs: + typedef = typemap.struct_typedefs[ctype] + assert isinstance(typedef.type, ca.IdentifierType) + typedef_name = typedef.type.names[0] + elif ctype.name and ctype.name in typemap.struct_typedefs: + typedef = typemap.struct_typedefs[ctype.name] + assert isinstance(typedef.type, ca.IdentifierType) + typedef_name = typedef.type.names[0] + + assert ( + struct.size % struct.align == 0 + ), "struct size must be a multiple of its alignment" + + decl = StructDeclaration( + size=struct.size, + align=struct.align, + tag_name=ctype.name, + typedef_name=typedef_name, + fields=[], + has_bitfields=struct.has_bitfields, + is_union=isinstance(ctype, ca.Union), ) - if fn.ret_type is not None: - fn_sig.return_type = type_from_ctype(fn.ret_type, typemap) - if fn.params is not None: - fn_sig.params = [ - FunctionParam( - name=param.name or "", - type=type_from_ctype(param.type, typemap), - ) - for param in fn.params - ] - fn_sig.params_known = True - return Type.function(fn_sig) - if isinstance(real_ctype, ca.TypeDecl): - if isinstance(real_ctype.type, (ca.Struct, ca.Union)): - struct = parse_struct(real_ctype.type, typemap) - return Type._ctype(struct.type, typemap, size=struct.size * 8) - names = ( - ["int"] if isinstance(real_ctype.type, ca.Enum) else real_ctype.type.names - ) - if "double" in names: - return Type.f64() - if "float" in names: - return Type.f32() - size = 8 * primitive_size(real_ctype.type) - if not size: - return Type._ctype(ctype, typemap, size=None) - sign = TypeData.UNSIGNED if "unsigned" in names else TypeData.SIGNED - return Type(TypeData(kind=TypeData.K_INT, size=size, sign=sign)) + # Register the struct in the typepool now, before parsing the fields, + # in case there are any self-referential fields in this struct. + typepool.add_struct(decl, ctype) - -def array_type_and_dim(ctype: ca.ArrayDecl, typemap: TypeMap) -> Tuple[Type, int]: - dim = 0 - try: - if ctype.dim is not None: - dim = parse_constant_int(ctype.dim, typemap) - except DecompFailure: - pass - return ( - type_from_ctype(ctype.type, typemap, array_decay=False), - dim, - ) - - -def ptr_type_from_ctype(ctype: CType, typemap: TypeMap) -> Tuple[Type, Optional[int]]: - real_ctype = resolve_typedefs(ctype, typemap) - if isinstance(real_ctype, ca.ArrayDecl): - # Array to pointer decay - inner_type, dim = array_type_and_dim(real_ctype, typemap) - return Type.ptr(inner_type), dim - return Type.ptr(type_from_ctype(ctype, typemap)), None - - -def get_field( - type: Type, offset: int, *, target_size: Optional[int] -) -> Tuple[Optional[str], Type, Type, Optional[int]]: - """Returns field name, target type, target pointer type, and - the field's array size (or None if the field is not an array).""" - if target_size is None and offset == 0: - # We might as well take a pointer to the whole struct - target = type.get_pointer_target() or Type.any() - return None, target, type, None - - deref_type = type.get_pointer_target() - if deref_type is None: - return None, Type.any(), Type.ptr(), None - ctype_and_typemap = deref_type.get_ctype_and_typemap() - if ctype_and_typemap is None: - return None, Type.any(), Type.ptr(), None - ctype, typemap = ctype_and_typemap - ctype = resolve_typedefs(ctype, typemap) - - if isinstance(ctype, ca.TypeDecl) and isinstance(ctype.type, (ca.Struct, ca.Union)): - struct = get_struct(ctype.type, typemap) - if struct: - fields = struct.fields.get(offset) - if fields: - # Ideally, we should use target_size and the target pointer type to - # determine which struct field to use if there are multiple at the - # same offset (e.g. if a struct starts here, or we have a union). - # For now though, we just use target_size as a boolean signal -- if - # it's known we take an arbitrary subfield that's as concrete as - # possible, if unknown we prefer a whole substruct. (The latter case - # happens when taking pointers to fields -- pointers to substructs are - # more common and can later be converted to concrete field pointers.) - if target_size is None: - # Structs will be placed first in the field list. - field = fields[0] - else: - # Pick the first subfield in case of unions. - correct_size_fields = [f for f in fields if f.size == target_size] - if len(correct_size_fields) == 1: - field = correct_size_fields[0] - else: - ind = 0 - while ind + 1 < len(fields) and fields[ind + 1].name.startswith( - fields[ind].name + "." - ): - ind += 1 - field = fields[ind] - return ( + for offset, fields in sorted(struct.fields.items()): + for field in fields: + field_type = Type.ctype(field.type, typemap, typepool) + assert field.size == field_type.get_size_bytes(), ( + field.size, + field_type.get_size_bytes(), field.name, - type_from_ctype(field.type, typemap), - *ptr_type_from_ctype(field.type, typemap), + field_type, ) - return None, Type.any(), Type.ptr(), None + decl.fields.append( + StructDeclaration.StructField( + type=field_type, + offset=offset, + name=field.name, + ) + ) + assert decl.fields == sorted(decl.fields, key=lambda f: f.offset) - -def find_substruct_array( - type: Type, offset: int, scale: int -) -> Optional[Tuple[str, int, Type]]: - if scale <= 0: - return None - deref_type = type.get_pointer_target() - if deref_type is None: - return None - ctype_and_typemap = deref_type.get_ctype_and_typemap() - if ctype_and_typemap is None: - return None - ctype, typemap = ctype_and_typemap - ctype = resolve_typedefs(ctype, typemap) - if not isinstance(ctype, ca.TypeDecl): - return None - if not isinstance(ctype.type, (ca.Struct, ca.Union)): - return None - struct = get_struct(ctype.type, typemap) - if not struct: - return None - for off, fields in sorted(struct.fields.items()): - if offset < off: - continue - for field in fields: - if offset >= off + field.size: - continue - field_type = resolve_typedefs(field.type, typemap) - if not isinstance(field_type, ca.ArrayDecl): - continue - size = var_size_align(field_type.type, typemap)[0] - if size == scale: - return field.name, off, type_from_ctype(field_type.type, typemap) - return None + return decl diff --git a/backend/mips_to_c/tests/end_to_end/arguments/irix-g-out.c b/backend/mips_to_c/tests/end_to_end/arguments/irix-g-out.c index 50642466..17571a9d 100644 --- a/backend/mips_to_c/tests/end_to_end/arguments/irix-g-out.c +++ b/backend/mips_to_c/tests/end_to_end/arguments/irix-g-out.c @@ -2,6 +2,6 @@ extern s32 D_4100F0; extern f32 D_4100F4; void test(f32 arg0, s32 arg1, f32 arg2, s32 arg3, f32 arg4, s32 arg5) { - D_4100F4 = (f32) (arg0 + arg2 + arg4); - D_4100F0 = (s32) (arg1 + arg3 + arg5); + D_4100F4 = arg0 + arg2 + arg4; + D_4100F0 = arg1 + arg3 + arg5; } diff --git a/backend/mips_to_c/tests/end_to_end/arguments/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/arguments/irix-o2-out.c index ca4ee2cd..af66d691 100644 --- a/backend/mips_to_c/tests/end_to_end/arguments/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/arguments/irix-o2-out.c @@ -2,6 +2,6 @@ extern s32 D_4100E0; extern f32 D_4100E4; void test(f32 arg0, s32 arg1, f32 arg2, s32 arg3, f32 arg4, s32 arg5) { - D_4100E4 = (f32) (arg0 + arg2 + arg4); - D_4100E0 = (s32) (arg1 + arg3 + arg5); + D_4100E4 = arg0 + arg2 + arg4; + D_4100E0 = arg1 + arg3 + arg5; } diff --git a/backend/mips_to_c/tests/end_to_end/array-access/irix-g-out.c b/backend/mips_to_c/tests/end_to_end/array-access/irix-g-out.c index 1829a849..81cabb10 100644 --- a/backend/mips_to_c/tests/end_to_end/array-access/irix-g-out.c +++ b/backend/mips_to_c/tests/end_to_end/array-access/irix-g-out.c @@ -1,7 +1,7 @@ extern s32 *D_410140; void test(struct A *a, s32 b) { - D_410140 = (s32 *) a->array[b]; + D_410140 = a->array[b]; D_410140 = (s32 *) &a->array[b]; D_410140 = (s32 *) a->array2[b].x; D_410140 = &a->array2[b].x; diff --git a/backend/mips_to_c/tests/end_to_end/array-access/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/array-access/irix-o2-out.c index 454280c7..97c7ac43 100644 --- a/backend/mips_to_c/tests/end_to_end/array-access/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/array-access/irix-o2-out.c @@ -1,7 +1,7 @@ extern s32 *D_410110; void test(struct A *a, s32 b) { - D_410110 = (s32 *) a->array[b]; + D_410110 = a->array[b]; D_410110 = (s32 *) &a->array[b]; D_410110 = (s32 *) a->array2[b].x; D_410110 = &a->array2[b].x; diff --git a/backend/mips_to_c/tests/end_to_end/comparison/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/comparison/irix-o2-out.c index caffe28b..7c56abfa 100644 --- a/backend/mips_to_c/tests/end_to_end/comparison/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/comparison/irix-o2-out.c @@ -1,10 +1,10 @@ extern s32 D_410100; void test(s32 arg0, s32 arg1, s32 arg2) { - D_410100 = (s32) (arg0 == arg1); - D_410100 = (s32) (arg0 != arg2); - D_410100 = (s32) (arg0 < arg1); - D_410100 = (s32) ((arg1 < arg0) ^ 1); - D_410100 = (s32) (arg0 == 0); - D_410100 = (s32) (arg1 != 0); + D_410100 = arg0 == arg1; + D_410100 = arg0 != arg2; + D_410100 = arg0 < arg1; + D_410100 = (arg1 < arg0) ^ 1; + D_410100 = arg0 == 0; + D_410100 = arg1 != 0; } diff --git a/backend/mips_to_c/tests/end_to_end/complicated_context/irix-o2-typemap-out.c b/backend/mips_to_c/tests/end_to_end/complicated_context/irix-o2-typemap-out.c index a1c9d687..6feae7e3 100644 --- a/backend/mips_to_c/tests/end_to_end/complicated_context/irix-o2-typemap-out.c +++ b/backend/mips_to_c/tests/end_to_end/complicated_context/irix-o2-typemap-out.c @@ -19,12 +19,12 @@ SomeStruct: size 0x280, align 8 0x0: int_field (int) 0x4: float_field (float) 0x8: pointer_field (void *) - 0x10: data_field (union SomeUnion) data_field.double_innerfield (double) data_field.char_innerfield (char) + 0x10: data_field (union SomeUnion) 0x18: enum_field (enum SomeEnum) 0x1c: anon_enum_field (anon enum) - 0x20: anon_struct_field (anon struct) anon_struct_field.sub (int) + 0x20: anon_struct_field (anon struct) 0x24: anon_union_field1 (int) anon_union_field2 (float) - 0x28: inner_struct_field (struct SubStruct) inner_struct_field.x (int) + 0x28: inner_struct_field (struct SubStruct) 0x30: long_long_field (long long) 0x38: bitfield_field (struct SomeBitfield) 0x40: array_arithmetic_1 (char [1 + 1]) @@ -63,8 +63,7 @@ SomeStruct: size 0x280, align 8 0x20d: array_arithmetic_34 (char [16 + ((0) ? (2) : (3))]) 0x220: array_arithmetic_35 (char [16 + ((2, 3))]) 0x233: char_array (char [2]) - 0x238: int_array (int [2]) int_array[0] (int) - 0x23c: int_array[1] (int) + 0x238: int_array (int [2]) 0x240: sub_array (struct { int a; @@ -72,26 +71,8 @@ SomeStruct: size 0x280, align 8 { int c; } b[3]; -} [2]) sub_array[0] (anon struct) sub_array[0].a (int) - 0x244: sub_array[0].b (struct -{ - int c; -} [3]) sub_array[0].b[0] (anon struct) sub_array[0].b[0].c (int) - 0x248: sub_array[0].b[1] (anon struct) sub_array[0].b[1].c (int) - 0x24c: sub_array[0].b[2] (anon struct) sub_array[0].b[2].c (int) - 0x250: sub_array[1] (anon struct) sub_array[1].a (int) - 0x254: sub_array[1].b (struct -{ - int c; -} [3]) sub_array[1].b[0] (anon struct) sub_array[1].b[0].c (int) - 0x258: sub_array[1].b[1] (anon struct) sub_array[1].b[1].c (int) - 0x25c: sub_array[1].b[2] (anon struct) sub_array[1].b[2].c (int) - 0x260: multidim_array (int [2][3]) multidim_array[0] (int [3]) multidim_array[0][0] (int) - 0x264: multidim_array[0][1] (int) - 0x268: multidim_array[0][2] (int) - 0x26c: multidim_array[1] (int [3]) multidim_array[1][0] (int) - 0x270: multidim_array[1][1] (int) - 0x274: multidim_array[1][2] (int) +} [2]) + 0x260: multidim_array (int [2][3]) 0x278: end (char) Enums: diff --git a/backend/mips_to_c/tests/end_to_end/conditional-moves/manual-out.c b/backend/mips_to_c/tests/end_to_end/conditional-moves/manual-out.c new file mode 100644 index 00000000..b740adde --- /dev/null +++ b/backend/mips_to_c/tests/end_to_end/conditional-moves/manual-out.c @@ -0,0 +1,6 @@ +s32 test(s32 arg2) { + s32 temp_a2; + + temp_a2 = (arg2 <= 0) ? 1 : arg2; + return (temp_a2 < 6) ? temp_a2 : 5; +} diff --git a/backend/mips_to_c/tests/end_to_end/conditional-moves/manual.s b/backend/mips_to_c/tests/end_to_end/conditional-moves/manual.s new file mode 100644 index 00000000..2533e45d --- /dev/null +++ b/backend/mips_to_c/tests/end_to_end/conditional-moves/manual.s @@ -0,0 +1,8 @@ +glabel test +li $a1,1 +slt $a0,$zero,$a2 +movz $a2,$a1,$a0 +li $v0,5 +slti $v1,$a2,6 +jr $ra +movn $v0,$a2,$v1 diff --git a/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido53-o2-out.c b/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido53-o2-out.c index 1bf0a6ee..20b1de16 100644 --- a/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido53-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido53-o2-out.c @@ -1,3 +1,3 @@ void test(s32 *arg0) { - *arg0 = (s32) ((s32) *arg0 / 2); + *arg0 = (s32) *arg0 / 2; } diff --git a/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido71-o2-out.c b/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido71-o2-out.c index 1bf0a6ee..20b1de16 100644 --- a/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido71-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/division-by-power-of-two/ido71-o2-out.c @@ -1,3 +1,3 @@ void test(s32 *arg0) { - *arg0 = (s32) ((s32) *arg0 / 2); + *arg0 = (s32) *arg0 / 2; } diff --git a/backend/mips_to_c/tests/end_to_end/error/manual-out.c b/backend/mips_to_c/tests/end_to_end/error/manual-out.c index 76739a0d..0b7da497 100644 --- a/backend/mips_to_c/tests/end_to_end/error/manual-out.c +++ b/backend/mips_to_c/tests/end_to_end/error/manual-out.c @@ -18,7 +18,7 @@ MIPS2C_TRAP_IF((u32) arg0 >= 6U); MIPS2C_ERROR(unknown instruction: badinstr $t0, $t0); temp_t1 = MIPS2C_ERROR(unknown instruction: badinstr2 $t1, $t1); - *NULL = (s32) (temp_t1 << temp_t1); + *NULL = temp_t1 << temp_t1; *NULL = (s32) (MIPS2C_ERROR(Read from unset register $v1) + 2); return MIPS2C_ERROR(unknown instruction: badinstr3 $v0, $t2); } diff --git a/backend/mips_to_c/tests/end_to_end/function-pointer2/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/function-pointer2/irix-o2-out.c index 6f334d78..0869c5f8 100644 --- a/backend/mips_to_c/tests/end_to_end/function-pointer2/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/function-pointer2/irix-o2-out.c @@ -1,4 +1,4 @@ -s32 bar(f32); // extern +s32 bar(f32 x); // extern extern s32 (*glob2)(f32); void test(void) { diff --git a/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-noemitglobals-out.c b/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-noemitglobals-out.c index 483b170c..df853ed2 100644 --- a/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-noemitglobals-out.c +++ b/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-noemitglobals-out.c @@ -1,6 +1,6 @@ s32 test(void) { static_int *= 0x1C8; - extern_float = (f32) (extern_float * 456.0f); + extern_float *= 456.0f; static_fn(&static_A); extern_fn(static_A_ptr); *static_bss_array = *static_array + *static_ro_array; diff --git a/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-out.c index 1ac23270..04701288 100644 --- a/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/global_decls/irix-o2-out.c @@ -20,7 +20,7 @@ s32 static_ro_array[3] = {7, 8, 9}; // const s32 test(void) { static_int *= 0x1C8; - extern_float = (f32) (extern_float * 456.0f); + extern_float *= 456.0f; static_fn(&static_A); extern_fn(static_A_ptr); *static_bss_array = *static_array + *static_ro_array; diff --git a/backend/mips_to_c/tests/end_to_end/if_postdec/irix-g-out.c b/backend/mips_to_c/tests/end_to_end/if_postdec/irix-g-out.c index f1410b99..6600ddd8 100644 --- a/backend/mips_to_c/tests/end_to_end/if_postdec/irix-g-out.c +++ b/backend/mips_to_c/tests/end_to_end/if_postdec/irix-g-out.c @@ -4,7 +4,7 @@ extern s32 D_4100F0; s32 temp_t6; temp_t6 = D_4100F0; - D_4100F0 = (s32) (temp_t6 - 1); + D_4100F0 = temp_t6 - 1; if (temp_t6 < 1) { return 4; } diff --git a/backend/mips_to_c/tests/end_to_end/if_postdec/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/if_postdec/irix-o2-out.c index 08a068bd..619aabb2 100644 --- a/backend/mips_to_c/tests/end_to_end/if_postdec/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/if_postdec/irix-o2-out.c @@ -4,7 +4,7 @@ extern s32 D_4100E0; s32 temp_v1; temp_v1 = D_4100E0; - D_4100E0 = (s32) (temp_v1 - 1); + D_4100E0 = temp_v1 - 1; if (temp_v1 <= 0) { return 4; } diff --git a/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-g-out.c b/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-g-out.c index a0f03753..e97259a7 100644 --- a/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-g-out.c +++ b/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-g-out.c @@ -1,6 +1,6 @@ extern s32 D_4100F0; void *test(void *arg0) { - D_4100F0 = (s32) arg0->unk12348; + D_4100F0 = arg0->unk12348; return arg0 + 0x12348; } diff --git a/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-o2-out.c index 609a4040..3478e403 100644 --- a/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/large-struct-offset/irix-o2-out.c @@ -1,6 +1,6 @@ extern s32 D_4100E0; void *test(void *arg0) { - D_4100E0 = (s32) arg0->unk12348; + D_4100E0 = arg0->unk12348; return arg0 + 0x12348; } diff --git a/backend/mips_to_c/tests/end_to_end/lwl/irix-g-out.c b/backend/mips_to_c/tests/end_to_end/lwl/irix-g-out.c index 8d083d95..59e331ed 100644 --- a/backend/mips_to_c/tests/end_to_end/lwl/irix-g-out.c +++ b/backend/mips_to_c/tests/end_to_end/lwl/irix-g-out.c @@ -17,5 +17,5 @@ void test(void) { D_410181 = (unaligned s32) D_410189; D_410190.unk0 = (unaligned s32) D_410180.unk0; D_410190.unk4 = (u8) D_410180.unk4; - D_410198 = (s32) (unaligned s32) D_400178; + D_410198 = (unaligned s32) D_400178; } diff --git a/backend/mips_to_c/tests/end_to_end/lwl/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/lwl/irix-o2-out.c index fb14dc32..90c30676 100644 --- a/backend/mips_to_c/tests/end_to_end/lwl/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/lwl/irix-o2-out.c @@ -17,5 +17,5 @@ void test(void) { D_410161 = (unaligned s32) D_410169; D_410170.unk0 = (unaligned s32) D_410160.unk0; D_410170.unk4 = (u8) D_410160.unk4; - D_410178 = (s32) (unaligned s32) D_400158; + D_410178 = (unaligned s32) D_400158; } diff --git a/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido53-o2-out.c b/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido53-o2-out.c index 53329a35..957b0bda 100644 --- a/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido53-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido53-o2-out.c @@ -1,3 +1,3 @@ void test(s32 *arg0) { - *arg0 = (s32) ((s32) *arg0 % 2); + *arg0 = (s32) *arg0 % 2; } diff --git a/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido71-o2-out.c b/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido71-o2-out.c index 53329a35..957b0bda 100644 --- a/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido71-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/modulo-by-power-of-two/ido71-o2-out.c @@ -1,3 +1,3 @@ void test(s32 *arg0) { - *arg0 = (s32) ((s32) *arg0 % 2); + *arg0 = (s32) *arg0 % 2; } diff --git a/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-g-out.c b/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-g-out.c index de6a1057..c0fecacc 100644 --- a/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-g-out.c +++ b/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-g-out.c @@ -2,25 +2,25 @@ extern s32 D_410250; void test(s32 arg0) { D_410250 = arg0; - D_410250 = (s32) (arg0 * 2); - D_410250 = (s32) (arg0 * 3); - D_410250 = (s32) (arg0 * 4); - D_410250 = (s32) (arg0 * 5); - D_410250 = (s32) (arg0 * 6); - D_410250 = (s32) (arg0 * 7); - D_410250 = (s32) (arg0 * 8); - D_410250 = (s32) (arg0 * 9); - D_410250 = (s32) (arg0 * 0xA); - D_410250 = (s32) (arg0 * 0xB); - D_410250 = (s32) (arg0 * 0xC); - D_410250 = (s32) (arg0 * 0xD); - D_410250 = (s32) (arg0 * 0xE); - D_410250 = (s32) (arg0 * 0xF); - D_410250 = (s32) (arg0 * 0x10); - D_410250 = (s32) (arg0 * 0x11); - D_410250 = (s32) (arg0 * 0x12); - D_410250 = (s32) (arg0 * 0x13); - D_410250 = (s32) (arg0 * 0x14); - D_410250 = (s32) (arg0 * 0x15); - D_410250 = (s32) (arg0 * 0x16); + D_410250 = arg0 * 2; + D_410250 = arg0 * 3; + D_410250 = arg0 * 4; + D_410250 = arg0 * 5; + D_410250 = arg0 * 6; + D_410250 = arg0 * 7; + D_410250 = arg0 * 8; + D_410250 = arg0 * 9; + D_410250 = arg0 * 0xA; + D_410250 = arg0 * 0xB; + D_410250 = arg0 * 0xC; + D_410250 = arg0 * 0xD; + D_410250 = arg0 * 0xE; + D_410250 = arg0 * 0xF; + D_410250 = arg0 * 0x10; + D_410250 = arg0 * 0x11; + D_410250 = arg0 * 0x12; + D_410250 = arg0 * 0x13; + D_410250 = arg0 * 0x14; + D_410250 = arg0 * 0x15; + D_410250 = arg0 * 0x16; } diff --git a/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-o2-out.c index e5df1767..0724c3e7 100644 --- a/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/mult-by-constant/irix-o2-out.c @@ -2,25 +2,25 @@ extern s32 D_4101F0; void test(s32 arg0) { D_4101F0 = arg0; - D_4101F0 = (s32) (arg0 * 2); - D_4101F0 = (s32) (arg0 * 3); - D_4101F0 = (s32) (arg0 * 4); - D_4101F0 = (s32) (arg0 * 5); - D_4101F0 = (s32) (arg0 * 6); - D_4101F0 = (s32) (arg0 * 7); - D_4101F0 = (s32) (arg0 * 8); - D_4101F0 = (s32) (arg0 * 9); - D_4101F0 = (s32) (arg0 * 0xA); - D_4101F0 = (s32) (arg0 * 0xB); - D_4101F0 = (s32) (arg0 * 0xC); - D_4101F0 = (s32) (arg0 * 0xD); - D_4101F0 = (s32) (arg0 * 0xE); - D_4101F0 = (s32) (arg0 * 0xF); - D_4101F0 = (s32) (arg0 * 0x10); - D_4101F0 = (s32) (arg0 * 0x11); - D_4101F0 = (s32) (arg0 * 0x12); - D_4101F0 = (s32) (arg0 * 0x13); - D_4101F0 = (s32) (arg0 * 0x14); - D_4101F0 = (s32) (arg0 * 0x15); - D_4101F0 = (s32) (arg0 * 0x16); + D_4101F0 = arg0 * 2; + D_4101F0 = arg0 * 3; + D_4101F0 = arg0 * 4; + D_4101F0 = arg0 * 5; + D_4101F0 = arg0 * 6; + D_4101F0 = arg0 * 7; + D_4101F0 = arg0 * 8; + D_4101F0 = arg0 * 9; + D_4101F0 = arg0 * 0xA; + D_4101F0 = arg0 * 0xB; + D_4101F0 = arg0 * 0xC; + D_4101F0 = arg0 * 0xD; + D_4101F0 = arg0 * 0xE; + D_4101F0 = arg0 * 0xF; + D_4101F0 = arg0 * 0x10; + D_4101F0 = arg0 * 0x11; + D_4101F0 = arg0 * 0x12; + D_4101F0 = arg0 * 0x13; + D_4101F0 = arg0 * 0x14; + D_4101F0 = arg0 * 0x15; + D_4101F0 = arg0 * 0x16; } diff --git a/backend/mips_to_c/tests/end_to_end/mult-by-two/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/mult-by-two/irix-o2-out.c index 8711316f..7b50d752 100644 --- a/backend/mips_to_c/tests/end_to_end/mult-by-two/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/mult-by-two/irix-o2-out.c @@ -2,6 +2,6 @@ extern f32 D_4100E0; extern f64 D_4100E8; void test(void) { - D_4100E0 = (f32) (2.0f * D_4100E0); - D_4100E8 = (f64) (2.0 * D_4100E8); + D_4100E0 *= 2.0f; + D_4100E8 *= 2.0; } diff --git a/backend/mips_to_c/tests/end_to_end/no-ifs-early-returns/irix-o2-out.c b/backend/mips_to_c/tests/end_to_end/no-ifs-early-returns/irix-o2-out.c index df49f970..939a87ec 100644 --- a/backend/mips_to_c/tests/end_to_end/no-ifs-early-returns/irix-o2-out.c +++ b/backend/mips_to_c/tests/end_to_end/no-ifs-early-returns/irix-o2-out.c @@ -10,9 +10,9 @@ void test(s32 *arg0, s32 *arg1) { } return; block_3: - *arg1 = (s32) (*arg1 + temp_v1); + *arg1 += temp_v1; return; block_4: - *arg1 = (s32) (*arg1 - temp_v1); + *arg1 -= temp_v1; return; }