mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2026-01-08 15:09:44 -06:00
458 lines
16 KiB
Java
458 lines
16 KiB
Java
/* ###
|
|
* IP: GHIDRA
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
//Uses overriding references and the LiSA constant propagator to resolve system calls
|
|
//@category Analysis
|
|
|
|
import java.io.*;
|
|
import java.util.*;
|
|
import java.util.Map.Entry;
|
|
import java.util.function.Predicate;
|
|
|
|
import generic.jar.ResourceFile;
|
|
import ghidra.app.cmd.function.ApplyFunctionDataTypesCmd;
|
|
import ghidra.app.cmd.memory.AddUninitializedMemoryBlockCmd;
|
|
import ghidra.app.plugin.core.analysis.AutoAnalysisManager;
|
|
import ghidra.app.script.GhidraScript;
|
|
import ghidra.app.services.DataTypeManagerService;
|
|
import ghidra.app.util.opinion.ElfLoader;
|
|
import ghidra.framework.Application;
|
|
import ghidra.lisa.pcode.PcodeFrontend;
|
|
import ghidra.lisa.pcode.analyses.PcodeByteBasedConstantPropagation;
|
|
import ghidra.lisa.pcode.locations.PcodeLocation;
|
|
import ghidra.program.model.address.*;
|
|
import ghidra.program.model.data.DataTypeManager;
|
|
import ghidra.program.model.lang.Register;
|
|
import ghidra.program.model.lang.SpaceNames;
|
|
import ghidra.program.model.listing.*;
|
|
import ghidra.program.model.mem.MemoryAccessException;
|
|
import ghidra.program.model.pcode.PcodeOp;
|
|
import ghidra.program.model.symbol.*;
|
|
import ghidra.util.Msg;
|
|
import ghidra.util.exception.CancelledException;
|
|
import ghidra.util.task.TaskMonitor;
|
|
import it.unive.lisa.*;
|
|
import it.unive.lisa.analysis.*;
|
|
import it.unive.lisa.analysis.nonrelational.value.ValueEnvironment;
|
|
import it.unive.lisa.interprocedural.InterproceduralAnalysis;
|
|
import it.unive.lisa.program.cfg.CFG;
|
|
import it.unive.lisa.program.cfg.statement.Statement;
|
|
import it.unive.lisa.symbolic.value.Identifier;
|
|
|
|
/**
|
|
* This script will resolve system calls for x86 or x64 Linux binaries.
|
|
* It assumes that in the x64 case, the syscall native instruction is used to make system calls,
|
|
* and in the x86 case, system calls are made via an indirect call to GS:[0x10].
|
|
* It should be straightforward to modify this script for other cases.
|
|
*/
|
|
public class Lisa_ResolveX86orX64LinuxSyscallsScript extends GhidraScript {
|
|
|
|
//disassembles to "CALL dword ptr GS:[0x10]"
|
|
private static final byte[] x86_bytes = { 0x65, -1, 0x15, 0x10, 0x00, 0x00, 0x00 };
|
|
|
|
private static final String X86 = "x86";
|
|
|
|
private static final String SYSCALL_SPACE_NAME = "syscall";
|
|
|
|
private static final int SYSCALL_SPACE_LENGTH = 0x10000;
|
|
|
|
//this is the name of the userop (aka CALLOTHER) in the pcode translation of the
|
|
//native "syscall" instruction
|
|
private static final String SYSCALL_X64_CALLOTHER = "syscall";
|
|
|
|
//a set of names of all syscalls that do not return
|
|
private static final Set<String> noreturnSyscalls = Set.of("exit", "exit_group");
|
|
|
|
//tests whether an instruction is making a system call
|
|
private Predicate<Instruction> tester;
|
|
|
|
//register holding the syscall number
|
|
private String syscallRegister;
|
|
|
|
//datatype archive containing signature of system calls
|
|
private String datatypeArchiveName;
|
|
|
|
//file containing map from syscall numbers to syscall names
|
|
//note that different architectures can have different system call numbers, even
|
|
//if they're both Linux...
|
|
private String syscallFileName;
|
|
|
|
//the type of overriding reference to apply
|
|
private RefType overrideType;
|
|
|
|
//the calling convention to use for system calls (must be defined in the appropriate .cspec file)
|
|
private String callingConvention;
|
|
|
|
private InterproceduralAnalysis<?> ipa;
|
|
|
|
private PcodeFrontend frontend;
|
|
|
|
private LiSA lisa;
|
|
|
|
private Map<Address, CFG> targets = new HashMap<>();
|
|
|
|
@Override
|
|
protected void run() throws Exception {
|
|
|
|
if (!(currentProgram.getExecutableFormat().equals(ElfLoader.ELF_NAME) &&
|
|
currentProgram.getLanguage().getProcessor().toString().equals(X86))) {
|
|
popup("This script is intended for x86 or x64 Linux files");
|
|
return;
|
|
}
|
|
|
|
//determine whether the executable is 32 or 64 bit and set fields appropriately
|
|
int size = currentProgram.getLanguage().getLanguageDescription().getSize();
|
|
if (size == 64) {
|
|
tester = Lisa_ResolveX86orX64LinuxSyscallsScript::checkX64Instruction;
|
|
syscallRegister = "RAX";
|
|
datatypeArchiveName = "generic_clib_64";
|
|
syscallFileName = "x64_linux_syscall_numbers";
|
|
overrideType = RefType.CALLOTHER_OVERRIDE_CALL;
|
|
callingConvention = "syscall";
|
|
}
|
|
else {
|
|
tester = Lisa_ResolveX86orX64LinuxSyscallsScript::checkX86Instruction;
|
|
syscallRegister = "EAX";
|
|
datatypeArchiveName = "generic_clib";
|
|
syscallFileName = "x86_linux_syscall_numbers";
|
|
overrideType = RefType.CALL_OVERRIDE_UNCONDITIONAL;
|
|
callingConvention = "syscall";
|
|
}
|
|
|
|
//get the space where the system calls live.
|
|
//If it doesn't exist, create it.
|
|
AddressSpace syscallSpace =
|
|
currentProgram.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME);
|
|
if (syscallSpace == null) {
|
|
//don't muck with address spaces if you don't have exclusive access to the program.
|
|
if (!currentProgram.hasExclusiveAccess()) {
|
|
popup("Must have exclusive access to " + currentProgram.getName() +
|
|
" to run this script");
|
|
return;
|
|
}
|
|
Address startAddr = currentProgram.getAddressFactory()
|
|
.getAddressSpace(SpaceNames.OTHER_SPACE_NAME)
|
|
.getAddress(0x0L);
|
|
AddUninitializedMemoryBlockCmd cmd = new AddUninitializedMemoryBlockCmd(
|
|
SYSCALL_SPACE_NAME, null, this.getClass().getName(), startAddr,
|
|
SYSCALL_SPACE_LENGTH, true, true, true, false, true);
|
|
if (!cmd.applyTo(currentProgram)) {
|
|
popup("Failed to create " + SYSCALL_SPACE_NAME);
|
|
return;
|
|
}
|
|
syscallSpace = currentProgram.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME);
|
|
}
|
|
else {
|
|
printf("AddressSpace %s found, continuing...\n", SYSCALL_SPACE_NAME);
|
|
}
|
|
|
|
//get all of the functions that contain system calls
|
|
//note that this will not find system call instructions that are not in defined functions
|
|
Map<Function, Set<Address>> funcsToCalls = getSyscallsInFunctions(currentProgram, monitor);
|
|
|
|
if (funcsToCalls.isEmpty()) {
|
|
popup("No system calls found (within defined functions)");
|
|
return;
|
|
}
|
|
|
|
//get the system call number at each callsite of a system call.
|
|
//note that this is not guaranteed to succeed at a given system call call site -
|
|
//it might be hard (or impossible) to determine a specific constant
|
|
Map<Address, Long> addressesToSyscalls =
|
|
resolveConstants(funcsToCalls, currentProgram, monitor);
|
|
|
|
if (addressesToSyscalls.isEmpty()) {
|
|
popup("Couldn't resolve any syscall constants");
|
|
return;
|
|
}
|
|
|
|
//get the map from system call numbers to system call names
|
|
//you might have to create this yourself!
|
|
Map<Long, String> syscallNumbersToNames = getSyscallNumberMap();
|
|
|
|
//at each system call call site where a constant could be determined, create
|
|
//the system call (if not already created), then add the appropriate overriding reference
|
|
//use syscallNumbersToNames to name the created functions
|
|
//if there's not a name corresponding to the constant use a default
|
|
for (Entry<Address, Long> entry : addressesToSyscalls.entrySet()) {
|
|
Address callSite = entry.getKey();
|
|
Long offset = entry.getValue();
|
|
printerr(callSite + ":" + Long.toHexString(offset));
|
|
Address callTarget = syscallSpace.getAddress(offset);
|
|
Function callee = currentProgram.getFunctionManager().getFunctionAt(callTarget);
|
|
if (callee == null) {
|
|
String funcName = "syscall_" + String.format("%08X", offset);
|
|
if (syscallNumbersToNames.get(offset) != null) {
|
|
funcName = syscallNumbersToNames.get(offset);
|
|
}
|
|
callee = createFunction(callTarget, funcName);
|
|
if (callee == null) {
|
|
continue;
|
|
}
|
|
callee.setCallingConvention(callingConvention);
|
|
|
|
//check if the function name is one of the non-returning syscalls
|
|
if (noreturnSyscalls.contains(funcName)) {
|
|
callee.setNoReturn(true);
|
|
}
|
|
}
|
|
Reference ref = currentProgram.getReferenceManager()
|
|
.addMemoryReference(callSite, callTarget, overrideType, SourceType.USER_DEFINED,
|
|
Reference.MNEMONIC);
|
|
//overriding references must be primary to be active
|
|
currentProgram.getReferenceManager().setPrimary(ref, true);
|
|
}
|
|
|
|
//finally, open the appropriate data type archive and apply its function data types
|
|
//to the new system call space, so that the system calls have the correct signatures
|
|
AutoAnalysisManager mgr = AutoAnalysisManager.getAnalysisManager(currentProgram);
|
|
DataTypeManagerService service = mgr.getDataTypeManagerService();
|
|
List<DataTypeManager> dataTypeManagers = new ArrayList<>();
|
|
dataTypeManagers.add(service.openDataTypeArchive(datatypeArchiveName));
|
|
dataTypeManagers.add(currentProgram.getDataTypeManager());
|
|
ApplyFunctionDataTypesCmd cmd = new ApplyFunctionDataTypesCmd(dataTypeManagers,
|
|
new AddressSet(syscallSpace.getMinAddress(), syscallSpace.getMaxAddress()),
|
|
SourceType.USER_DEFINED, false, false);
|
|
cmd.applyTo(currentProgram);
|
|
}
|
|
|
|
private Map<Long, String> getSyscallNumberMap() {
|
|
Map<Long, String> syscallMap = new HashMap<>();
|
|
ResourceFile rFile = Application.findDataFileInAnyModule(syscallFileName);
|
|
if (rFile == null) {
|
|
popup("Error opening syscall number file, using default names");
|
|
return syscallMap;
|
|
}
|
|
try (FileReader fReader = new FileReader(rFile.getFile(false));
|
|
BufferedReader bReader = new BufferedReader(fReader)) {
|
|
String line = null;
|
|
while ((line = bReader.readLine()) != null) {
|
|
//lines starting with # are comments
|
|
if (!line.startsWith("#")) {
|
|
String[] parts = line.trim().split(" ");
|
|
Long number = Long.parseLong(parts[0]);
|
|
syscallMap.put(number, parts[1]);
|
|
}
|
|
}
|
|
}
|
|
catch (IOException e) {
|
|
Msg.showError(this, null, "Error reading syscall map file", e.getMessage(), e);
|
|
}
|
|
return syscallMap;
|
|
}
|
|
|
|
/**
|
|
* Scans through all of the functions defined in {@code program} and returns
|
|
* a map which takes a function to the set of address in its body which contain
|
|
* system calls
|
|
* @param program program containing functions
|
|
* @param tMonitor monitor
|
|
* @return map function -> addresses in function containing syscalls
|
|
* @throws CancelledException if the user cancels
|
|
*/
|
|
private Map<Function, Set<Address>> getSyscallsInFunctions(Program program,
|
|
TaskMonitor tMonitor) throws CancelledException {
|
|
Map<Function, Set<Address>> funcsToCalls = new HashMap<>();
|
|
for (Function func : program.getFunctionManager().getFunctionsNoStubs(true)) {
|
|
tMonitor.checkCancelled();
|
|
for (Instruction inst : program.getListing().getInstructions(func.getBody(), true)) {
|
|
if (tester.test(inst)) {
|
|
Set<Address> callSites = funcsToCalls.get(func);
|
|
if (callSites == null) {
|
|
callSites = new HashSet<>();
|
|
funcsToCalls.put(func, callSites);
|
|
}
|
|
callSites.add(inst.getAddress());
|
|
}
|
|
}
|
|
}
|
|
return funcsToCalls;
|
|
}
|
|
|
|
/**
|
|
* Uses the LiSA constant propagator to attempt to determine the constant value in
|
|
* the syscall register at each system call instruction
|
|
*
|
|
* @param funcsToCalls map from functions containing syscalls to address in each function of
|
|
* the system call
|
|
* @param program containing the functions
|
|
* @return map from addresses of system calls to system call numbers
|
|
* @throws CancelledException if the user cancels
|
|
*/
|
|
private Map<Address, Long> resolveConstants(Map<Function, Set<Address>> funcsToCalls,
|
|
Program program, TaskMonitor tMonitor) throws CancelledException {
|
|
initLisa();
|
|
Set<CFG> cfgs = new HashSet<>();
|
|
for (Function func : funcsToCalls.keySet()) {
|
|
CFG cfg = frontend.visitFunction(func, func.getEntryPoint());
|
|
cfgs.add(cfg);
|
|
}
|
|
it.unive.lisa.program.Program p = frontend.getProgram();
|
|
Collection<CFG> baseline = p.getAllCFGs();
|
|
for (CFG cfg : cfgs) {
|
|
if (baseline.contains(cfg)) {
|
|
p.addEntryPoint(cfg);
|
|
}
|
|
}
|
|
|
|
LiSAReport report = lisa.run(p);
|
|
ipa = report.getConfiguration().interproceduralAnalysis;
|
|
storeResults(frontend.getProgram(), ipa);
|
|
|
|
Map<Address, Long> addressesToSyscalls = new HashMap<>();
|
|
Register syscallReg = program.getLanguage().getRegister(syscallRegister);
|
|
for (Function func : funcsToCalls.keySet()) {
|
|
for (Address callSite : funcsToCalls.get(func)) {
|
|
long val = getRegisterValue(callSite, syscallReg);
|
|
if (val == -1) {
|
|
//createBookmark(callSite, "System Call",
|
|
// "Couldn't resolve value of " + syscallReg);
|
|
//printf("Couldn't resolve value of " + syscallReg + " at " + callSite + "\n");
|
|
continue;
|
|
}
|
|
addressesToSyscalls.put(callSite, val);
|
|
}
|
|
}
|
|
return addressesToSyscalls;
|
|
}
|
|
|
|
private long getRegisterValue(Address callSite, Register syscallReg) {
|
|
Set<Statement> set = frontend.getStatement(callSite);
|
|
if (set == null) {
|
|
printerr("Null set for " + callSite);
|
|
return -1;
|
|
}
|
|
String offset = syscallReg.getAddress().toString();
|
|
Function f = currentProgram.getListing().getFunctionContaining(callSite);
|
|
CFG cfg = targets.get(f.getEntryPoint());
|
|
if (cfg == null) {
|
|
printerr("Null cfg for " + callSite);
|
|
return -1;
|
|
}
|
|
Collection<?> results = ipa.getAnalysisResultsOf(cfg);
|
|
Statement st = null;
|
|
for (Statement obj : set) {
|
|
PcodeLocation loc = (PcodeLocation) obj.getLocation();
|
|
if (loc.op.getOpcode() >= PcodeOp.CALL && loc.op.getOpcode() <= PcodeOp.CALLOTHER) {
|
|
st = obj;
|
|
}
|
|
}
|
|
if (st == null) {
|
|
printerr("Null statement for " + callSite);
|
|
return -1;
|
|
}
|
|
Iterator<?> iterator = results.iterator();
|
|
while (iterator.hasNext()) {
|
|
Object next = iterator.next();
|
|
if (next instanceof AnalyzedCFG<?> acfg) {
|
|
AnalysisState<?> state1;
|
|
try {
|
|
state1 = acfg.getAnalysisStateBefore(st);
|
|
AbstractState<?> state2 = state1.getState();
|
|
if (state2 instanceof SimpleAbstractState sas) {
|
|
ValueEnvironment<?> valueState = (ValueEnvironment<?>) sas.getValueState();
|
|
Map<Identifier, ?> function = valueState.function;
|
|
if (function != null) {
|
|
for (Object key : function.keySet()) {
|
|
Object val = valueState.function.get(key);
|
|
if (val instanceof PcodeByteBasedConstantPropagation icp) {
|
|
String keyStr = key.toString();
|
|
if (keyStr.equals(offset)) {
|
|
String valstr = icp.representation().toString();
|
|
if (!valstr.contains("#TOP#")) {
|
|
return Long.parseLong(valstr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (Exception e) {
|
|
printerr(e.getMessage());
|
|
}
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
private void initLisa() {
|
|
frontend = new PcodeFrontend();
|
|
|
|
DefaultConfiguration conf = new DefaultConfiguration();
|
|
conf.serializeResults = true;
|
|
conf.abstractState = DefaultConfiguration.simpleState(
|
|
DefaultConfiguration.defaultHeapDomain(),
|
|
new ValueEnvironment<>(
|
|
new PcodeByteBasedConstantPropagation(currentProgram.getLanguage())),
|
|
DefaultConfiguration.defaultTypeDomain());
|
|
conf.serializeResults = false;
|
|
lisa = new LiSA(conf);
|
|
}
|
|
|
|
private void storeResults(it.unive.lisa.program.Program p,
|
|
InterproceduralAnalysis<?> interproceduralAnalysis) {
|
|
Collection<CFG> ep = p.getEntryPoints();
|
|
for (CFG cfg : ep) {
|
|
Collection<Statement> entrypoints = cfg.getEntrypoints();
|
|
for (Statement st : entrypoints) {
|
|
PcodeLocation loc = (PcodeLocation) st.getLocation();
|
|
Address target = loc.op.getSeqnum().getTarget();
|
|
Function f = currentProgram.getListing().getFunctionContaining(target);
|
|
targets.put(f.getEntryPoint(), cfg);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks whether an x86 native instruction is a system call
|
|
* @param inst instruction to check
|
|
* @return true precisely when the instruction is a system call
|
|
*/
|
|
private static boolean checkX86Instruction(Instruction inst) {
|
|
try {
|
|
return Arrays.equals(x86_bytes, inst.getBytes());
|
|
}
|
|
catch (MemoryAccessException e) {
|
|
Msg.info(Lisa_ResolveX86orX64LinuxSyscallsScript.class,
|
|
"MemoryAccessException at " + inst.getAddress().toString());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks whether an x64 instruction is a system call
|
|
* @param inst instruction to check
|
|
* @return true precisely when the instruction is a system call
|
|
*/
|
|
private static boolean checkX64Instruction(Instruction inst) {
|
|
boolean retVal = false;
|
|
for (PcodeOp op : inst.getPcode()) {
|
|
if (op.getOpcode() == PcodeOp.CALLOTHER) {
|
|
int index = (int) op.getInput(0).getOffset();
|
|
if (inst.getProgram()
|
|
.getLanguage()
|
|
.getUserDefinedOpName(index)
|
|
.equals(SYSCALL_X64_CALLOTHER)) {
|
|
retVal = true;
|
|
}
|
|
}
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
}
|