From 1684aaf61f7720c15354b0acedfcfabf1c7a9684 Mon Sep 17 00:00:00 2001 From: ghizard <50744617+ghizard@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:27:14 -0400 Subject: [PATCH] GP-5861 - PDB, MDMang, and DTUtils optim - fix for processing of truncated symbols and optional prefix on MDMang datatype demangling --- .../src/main/java/mdemangler/MDMangUtils.java | 8 ++ .../mdemangler/datatype/MDDataTypeParser.java | 11 +- .../AbstractComplexTypeApplier.java | 24 +++- .../database/data/DataTypeUtilities.java | 106 +++++++++--------- 4 files changed, 91 insertions(+), 58 deletions(-) diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java index 5f0bb5391d..c244a58624 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import ghidra.app.util.SymbolPath; @@ -211,6 +212,9 @@ public class MDMangUtils { // When simple is true, we need to recurse the nested hierarchy to pull the names // up to the main namespace level, so we set recurse = true recurseNamespace(demangledParts, parsableItem, simple); + if(ObjectUtils.isEmpty(regularPathName)) { + return createSymbolPath(demangledParts); + } List regularParts = SymbolPathParser.parse(regularPathName); int m = Integer.min(demangledParts.size(), regularParts.size()); @@ -239,6 +243,10 @@ public class MDMangUtils { parts.add(0, n); } + return createSymbolPath(parts); + } + + private static SymbolPath createSymbolPath(List parts) { SymbolPath sp = null; for (String part : parts) { sp = new SymbolPath(sp, part); diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/MDDataTypeParser.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/MDDataTypeParser.java index 7da58d9f17..9318bb2f1c 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/MDDataTypeParser.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/MDDataTypeParser.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -48,14 +48,13 @@ public class MDDataTypeParser { throws MDException { MDDataType dt = null; - if (dmang.peek() != '.') { - throw new MDException("MDMang: Mangled string is not that of a type."); - } dmang.setProcessingMode(ProcessingMode.DEFAULT_STANDARD); try { dmang.pushContext(); - dmang.increment(); // skip the '.' + if (dmang.peek() == '.') { + dmang.increment(); // skip the now-optional '.' + } dt = parseDataType(dmang, isHighest); dt.parse(); dmang.popContext(); diff --git a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/pdb/pdbapplicator/AbstractComplexTypeApplier.java b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/pdb/pdbapplicator/AbstractComplexTypeApplier.java index afd8bec998..ddc9971bac 100644 --- a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/pdb/pdbapplicator/AbstractComplexTypeApplier.java +++ b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/pdb/pdbapplicator/AbstractComplexTypeApplier.java @@ -104,13 +104,35 @@ public abstract class AbstractComplexTypeApplier extends MsDataTypeApplier { // often had a member that also lambda that was marked with the exact same namespace/name // as the containing structure. We found that the mangled names had more accurate and // distinguished lambda numbers. + + // Future: probably want to change both mangled and non-mangled symbols for best, as both + // could be truncated, but it is likely that partial results from a mangled symbol would + // have more detail than the partial results of a truncated non-mangled symbol. Thus, + // we should make getSymbolPathFromMangleTypeName() should do more work, even if the + // mangled symbol is truncated... perhaps we pass in a flag indicating to continue + // processing with the assumption that it is truncated? + + boolean truncated = name.length() == 4096; // works unless real length was 4096 if (mangledName != null) { - symbolPath = getSymbolPathFromMangledTypeName(mangledName, name); + symbolPath = getSymbolPathFromMangledTypeName(mangledName, truncated ? null : name); } if (symbolPath == null) { symbolPath = MDMangUtils.standarizeSymbolPathUnderscores( new SymbolPath(SymbolPathParser.parse(name))); + // If name was truncated at 4096 characters, then we likely do not have a complete + // symbol. In a rare case, we had a blank "name" because the truncation happened + // right after a namespace delimiter. Whether blank or not, we are appending + // a truncation message to the "name." Note, however, that we could have a rare case + // where there were exactly 4096 characters without truncation where we will still + // append the truncation message. We could try harder to ensure proper namespace + // parsing was done, and only then decide whether we need to add the message. That + // proper parsing is a separate research effort. For now, just append when we have + // 4096. + if (truncated) { + symbolPath = + new SymbolPath(symbolPath.getParent(), symbolPath.getName() + "_truncated"); + } } return symbolPath; } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/data/DataTypeUtilities.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/data/DataTypeUtilities.java index ec2754177e..167a421f9d 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/data/DataTypeUtilities.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/data/DataTypeUtilities.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -62,13 +62,13 @@ public class DataTypeUtilities { cPrimitiveNameMap.put("long double", LongDoubleDataType.dataType); } - // TODO: Should we drop the handling of "_" use in conflict name. It's unclear + // TODO: Should we drop the handling of "_" use in conflict name. It's unclear // when/if this was ever used in the generation of a conflict name. - // NOTE: It is assumed that all BuiltInDataType types (other then possibly Pointers) + // NOTE: It is assumed that all BuiltInDataType types (other then possibly Pointers) // will not utilize conflict names. This includes pointers and arrays whose base // type is a BuiltInDataType. // NOTE: The BASE_DATATYPE_CONFLICT_PATTERN may never be applied to a pointer - // or array name as it will always fail to match. Pointer and array decorations + // or array name as it will always fail to match. Pointer and array decorations // must be stripped off first. private static final Pattern BASE_DATATYPE_CONFLICT_PATTERN = Pattern.compile(Pattern.quote(DataType.CONFLICT_SUFFIX) + "([_]{0,1}\\d+){0,1}$"); @@ -157,7 +157,7 @@ public class DataTypeUtilities { * Note: pointers to the second data type are references and therefore are not considered to be * part of the first and won't cause true to be returned. If you pass a pointer to this method * for the first or second parameter, it will return false. - * + * * @param firstDataType the data type whose components or base type should be checked to see if * the second data type is part of it. * @param secondDataType the data type to be checked for in the first data type. @@ -192,7 +192,7 @@ public class DataTypeUtilities { /** * Returns true if the two dataTypes have the same sourceArchive and the same UniversalID - * + * * @param dataType1 first data type * @param dataType2 second data type * @return true if types correspond to the same type from a source archive @@ -218,7 +218,7 @@ public class DataTypeUtilities { /** * Returns true if two dataTypes have the same sourceArchive and the same UniversalID OR are * equivalent - * + * * @param dataType1 first data type (if invoked by DB object or manager, this argument must * correspond to the DataTypeDB). * @param dataType2 second data type @@ -230,6 +230,10 @@ public class DataTypeUtilities { if (isSameDataType(dataType1, dataType2)) { return true; } + if (dataType2 instanceof DataTypeDB) { + // Leverage optimizations within most DataTypeDB implementations + return dataType2.isEquivalent(dataType1); + } // otherwise, check if they are equivalent return dataType1.isEquivalent(dataType2); } @@ -237,16 +241,16 @@ public class DataTypeUtilities { /** * Determine if two dataTypes are the same kind of datatype without considering naming or * component makeup. The use of Typedefs is ignored and stripped away for comparison. - * This method also ignores details about most built-in types, pointers and arrays + * This method also ignores details about most built-in types, pointers and arrays * (e.g., number of elements or size). Implementations of the following abstract classes * will be treated as the same kind as another datatype which extends the same abstract * class: * - * Other uses of {@link BuiltInDataType} must match the specific implementation class. + * Other uses of {@link BuiltInDataType} must match the specific implementation class. * @param dataType1 first data type * @param dataType2 second data type * @return true if the two dataTypes are the same basic kind else false @@ -311,7 +315,7 @@ public class DataTypeUtilities { * Get the base data type for the specified data type stripping away pointers and arrays only. A * null will be returned for a default pointer. * - * @param dt the data type whose base data type is to be determined. + * @param dt the data type whose base data type is to be determined. * @return the base data type (may be null for default pointer). */ public static DataType getBaseDataType(DataType dt) { @@ -393,7 +397,7 @@ public class DataTypeUtilities { /** * Create a data type category path derived from the specified namespace and rooted from the * specified baseCategory - * + * * @param baseCategory category path from which to root the namespace-base path * @param namespace the namespace * @return namespace derived category path @@ -415,9 +419,9 @@ public class DataTypeUtilities { * Find the structure data type which corresponds to the specified class namespace * within the specified data type manager. * The structure must utilize a namespace-based category path, however, - * the match criteria can be fuzzy and relies primarily on the full class namespace. - * A properly named class structure must reside within a category whose trailing - * path either matches the class namespace or the class-parent's namespace. + * the match criteria can be fuzzy and relies primarily on the full class namespace. + * A properly named class structure must reside within a category whose trailing + * path either matches the class namespace or the class-parent's namespace. * Preference is given to it residing within the class-parent's namespace. * @param dataTypeManager data type manager which should be searched. * @param classNamespace class namespace @@ -440,10 +444,10 @@ public class DataTypeUtilities { /** * Attempt to find the data type whose dtName and specified namespace match a stored data type - * within the specified dataTypeManager. The first match which satisfies the category path - * requirement will be returned. If a non-root namespace is specified the datatype's trailing + * within the specified dataTypeManager. The first match which satisfies the category path + * requirement will be returned. If a non-root namespace is specified the datatype's trailing * category path must match the specified namespace path. - * + * * @param dataTypeManager data type manager * @param namespace namespace associated with dtName (null indicates no namespace constraint) * @param dtName name of data type @@ -466,10 +470,10 @@ public class DataTypeUtilities { /** * Attempt to find the data type whose dtNameWithNamespace match a stored data type within the - * specified dataTypeManager. The namespace will be used in checking data type parent categories. - * NOTE: name parsing assumes :: namespace delimiter which can be thrown off if name includes + * specified dataTypeManager. The namespace will be used in checking data type parent categories. + * NOTE: name parsing assumes :: namespace delimiter which can be thrown off if name includes * template information which could contain namespaces (see {@link SymbolPathParser#parse(String)}). - * + * * @param dataTypeManager data type manager * @param dtNameWithNamespace name of data type qualified with namespace (e.g., * ns1::ns2::dtname) @@ -507,7 +511,7 @@ public class DataTypeUtilities { /** * Return the appropriate datatype for a given C primitive datatype name. - * + * * @param dataTypeName the datatype name (e.g. "unsigned int", "long long") * @return the appropriate datatype for a given C primitive datatype name. */ @@ -525,11 +529,11 @@ public class DataTypeUtilities { /** * Get relative/partial category paths which corresponds to a specified namespace. - * Any {@link Library} namespace will be ignored and treated like the global namespace + * Any {@link Library} namespace will be ignored and treated like the global namespace * when generating a related category path. An empty string will be returned for the * global namespace. * @param namespace data type namespace - * @return partial two-element array with category path for namespace [NAMESPACE_PATH_INDEX] + * @return partial two-element array with category path for namespace [NAMESPACE_PATH_INDEX] * and parent-namespace [PARENT_NAMESPACE_PATH_INDEX]. * A null is returned if namespace is null or the root/global namespace. */ @@ -557,12 +561,12 @@ public class DataTypeUtilities { /** * Namespace category matcher. Only those datatypes contained within a catgeory * whose trailing category path matches the specified namespacePath will be considered - * a possible match. If the namespacePath is empty array all category paths will + * a possible match. If the namespacePath is empty array all category paths will * be considered a match with preference given to the root category. * @param categoryPath datatype category path * @param namespacePath namespace path * @return {@link CategoryMatchType#PREFERRED} if namespace match found, {@link CategoryMatchType#SECONDARY} - * if no namespace constraint specified else {@link CategoryMatchType#NONE} if namespace constraint not + * if no namespace constraint specified else {@link CategoryMatchType#NONE} if namespace constraint not * satisfied. */ private static CategoryMatchType getCategoryMatchType(CategoryPath categoryPath, @@ -577,14 +581,14 @@ public class DataTypeUtilities { } /** - * Namespace category matcher. + * Namespace category matcher. * @param categoryPath datatype category path * @param namespacePaths namespace paths constraint or null for no namespace. This value should * be obtained from the {@link #getRelativeCategoryPaths(Namespace)} method. - * @param parentNamespacePreferred if true matching on parent namespace is + * @param parentNamespacePreferred if true matching on parent namespace is * enabled and preferred over match on actual namespace. This is used for * class structure searching. - * @return {@link CategoryMatchType#PREFERRED} is returned if parentNamespacePreferred is true + * @return {@link CategoryMatchType#PREFERRED} is returned if parentNamespacePreferred is true * and category path matches on parent-namespace or parentNamespacePreferred is false * and category path matches on namespace. {@link CategoryMatchType#SECONDARY} is returned * if parentNamespacePreferred is true and category path matches on namespace. Otherwise @@ -639,7 +643,7 @@ public class DataTypeUtilities { * @param A standard interface which extends {@link DataType} (e.g., {@link Structure}). * @param dataTypeManager datatype manager to query * @param rootPath root category path - * @param namespacePath an optional namespace path to be checked under rootPath. + * @param namespacePath an optional namespace path to be checked under rootPath. * If null or empty the rootPath will be checked for dtName. * @param dtName datatype name * @param classConstraint datatype class constraint (optional, may be null) @@ -667,7 +671,7 @@ public class DataTypeUtilities { } /** - * Get the specified datatype by name and category and return only if its type + * Get the specified datatype by name and category and return only if its type * corresponds to an class constraint if specified. * @param A standard interface which extends {@link DataType} (e.g., {@link Structure}). * @param category datatype category to query @@ -689,15 +693,15 @@ public class DataTypeUtilities { /** * Perform a preferred category namespace qualified datatype search using * category path supplied by {@link Program#getPreferredRootNamespaceCategoryPath()}. - * Any {@link Library} namespace will be ignored and treated like the global namespace - * when generating a related category path. This method only applies to - * {@link ProgramBasedDataTypeManager} and will always return null for other + * Any {@link Library} namespace will be ignored and treated like the global namespace + * when generating a related category path. This method only applies to + * {@link ProgramBasedDataTypeManager} and will always return null for other * datatype managers. * @param dataTypeManager datatype manager * @param namespace namespace constraint or null for no namespace. * @param dtName datatype name * @param classConstraint type of datatype by its interface class (e.g., {@link Structure}). - * @param parentNamespacePreferred if true matching on parent namespace is + * @param parentNamespacePreferred if true matching on parent namespace is * enabled and preferred over match on actual namespace. This is relavent for * class structure searching. * @return preferred datatype match if found @@ -745,15 +749,15 @@ public class DataTypeUtilities { }; /** - * Perform a namespace qualified datatype search. + * Perform a namespace qualified datatype search. * @param dataTypeManager datatype manager * @param dtName datatype name * @param classConstraint type of datatype by its interface class (e.g., {@link Structure}). * @param categoryMatcher responsible for evaluating the category path - * for a possible match with a namespace constraint. - * @return The first {@link CategoryMatchType#PREFERRED} match will be - * returned if found. If none are {@link CategoryMatchType#PREFERRED}, the first - * {@link CategoryMatchType#SECONDARY} match will be returned. Otherwise null is returned. + * for a possible match with a namespace constraint. + * @return The first {@link CategoryMatchType#PREFERRED} match will be + * returned if found. If none are {@link CategoryMatchType#PREFERRED}, the first + * {@link CategoryMatchType#SECONDARY} match will be returned. Otherwise null is returned. */ @SuppressWarnings("unchecked") private static T findDataType(DataTypeManager dataTypeManager, @@ -812,10 +816,10 @@ public class DataTypeUtilities { /** * Get the name of a data type with all conflict naming patterns removed. - * + * * @param dataType data type * @param includeCategoryPath if true, the category path will be included with the - * returned name (e.g., /mypath/mydt) and any occurance of a forward slash within individual + * returned name (e.g., /mypath/mydt) and any occurance of a forward slash within individual * path components, including the data type name, will be escaped (e.g., {@code "\/"}). * @return name with optional category path included */ @@ -829,7 +833,7 @@ public class DataTypeUtilities { /** * Get the name of a data type with all conflict naming patterns removed. - * + * * @param dataTypeName data type name with optional category path included * @return name with optional category path included */ @@ -849,7 +853,7 @@ public class DataTypeUtilities { /** * Get a datatype's name without conflict suffix. - * + * * @param dt datatype (pointer and array permitted) * @return datatype's name without conflict suffix */ @@ -904,13 +908,13 @@ public class DataTypeUtilities { /** * Get the conflict value string associated with a conflict datatype name. - * + * * @param dataType datatype to be checked * @return conflict value: *
    *
  1. -1: when type does not have a conflict name,
  2. *
  3. 0: when conflict name does not have a number (i.e., {@code .conflict}), or
  4. - *
  5. a positive value which corresponds to the conflict number in the name + *
  6. a positive value which corresponds to the conflict number in the name * (e.g., returns 2 for {@code .conflict2}).
  7. *
*/ @@ -930,13 +934,13 @@ public class DataTypeUtilities { /** * Get the conflict value associated with a conflict datatype name. - * + * * @param dataTypeName datatype name to be checked * @return conflict value: *
    *
  1. -1: when name is not have a conflict name,
  2. *
  3. 0: when conflict name does not have a number (i.e., {@code .conflict}), or
  4. - *
  5. a positive value which corresponds to the conflict number in the name + *
  6. a positive value which corresponds to the conflict number in the name * (e.g., returns 2 for {@code .conflict2}).
  7. *
*/ @@ -952,7 +956,7 @@ public class DataTypeUtilities { /** * Determine if the specified data type name is a conflict name. - * + * * @param dataTypeName datatype name * @return true if data type name is a conflict name. */ @@ -991,7 +995,7 @@ public class DataTypeUtilities { /** * Compares two data type name strings to determine if they are equivalent names, ignoring * conflict patterns present. - * + * * @param name1 the first name * @param name2 the second name * @return true if the names are equivalent when conflict suffixes are ignored.