From a31e781fe83759e0bc9771b2686ca5dd0eeab684 Mon Sep 17 00:00:00 2001 From: Tyler Yankee Date: Fri, 26 Sep 2025 11:20:32 -0400 Subject: [PATCH 1/3] instrumentation: Use end time for snippet file names --- Source/cmInstrumentation.cxx | 18 ++++++++++++------ Source/cmInstrumentation.h | 3 ++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Source/cmInstrumentation.cxx b/Source/cmInstrumentation.cxx index 7f5675e20d..4e4a13d737 100644 --- a/Source/cmInstrumentation.cxx +++ b/Source/cmInstrumentation.cxx @@ -539,10 +539,12 @@ std::string cmInstrumentation::InstrumentTest( } cmsys::SystemInformation& info = this->GetSystemInformation(); + std::chrono::system_clock::time_point endTime = + systemStart + std::chrono::milliseconds(root["duration"].asUInt64()); std::string file_name = cmStrCat( "test-", - this->ComputeSuffixHash(cmStrCat(command_str, info.GetProcessId())), - this->ComputeSuffixTime(), ".json"); + this->ComputeSuffixHash(cmStrCat(command_str, info.GetProcessId())), '-', + this->ComputeSuffixTime(endTime), ".json"); this->WriteInstrumentationJson(root, "data", file_name); return file_name; } @@ -674,10 +676,12 @@ int cmInstrumentation::InstrumentCommand( // Write Json cmsys::SystemInformation& info = this->GetSystemInformation(); + std::chrono::system_clock::time_point endTime = + system_start + std::chrono::milliseconds(root["duration"].asUInt64()); std::string const& file_name = cmStrCat( command_type, '-', - this->ComputeSuffixHash(cmStrCat(command_str, info.GetProcessId())), - this->ComputeSuffixTime(), ".json"); + this->ComputeSuffixHash(cmStrCat(command_str, info.GetProcessId())), '-', + this->ComputeSuffixTime(endTime), ".json"); this->WriteInstrumentationJson(root, "data", file_name); return ret; } @@ -704,11 +708,13 @@ std::string cmInstrumentation::ComputeSuffixHash( return hash; } -std::string cmInstrumentation::ComputeSuffixTime() +std::string cmInstrumentation::ComputeSuffixTime( + cm::optional time) { std::chrono::milliseconds ms = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); + (time.has_value() ? time.value() : std::chrono::system_clock::now()) + .time_since_epoch()); std::chrono::seconds s = std::chrono::duration_cast(ms); diff --git a/Source/cmInstrumentation.h b/Source/cmInstrumentation.h index e22d01fb22..fae948551f 100644 --- a/Source/cmInstrumentation.h +++ b/Source/cmInstrumentation.h @@ -89,7 +89,8 @@ private: bool HasQueryFile(std::string const& file); static std::string GetCommandStr(std::vector const& args); static std::string ComputeSuffixHash(std::string const& command_str); - static std::string ComputeSuffixTime(); + static std::string ComputeSuffixTime( + cm::optional time = cm::nullopt); void PrepareDataForCDash(std::string const& data_dir, std::string const& index_path); void RemoveOldFiles(std::string const& dataSubdir); From 0882a37f7360068f2d0d6c2950352cdaedb831c2 Mon Sep 17 00:00:00 2001 From: Tyler Yankee Date: Fri, 26 Sep 2025 11:28:47 -0400 Subject: [PATCH 2/3] instrumentation: Load snippets individually for trace Only loading the necessary timing data and deferring the dump of all snippet data into the trace file saves on memory for large traces. --- Source/cmInstrumentation.cxx | 40 +++++++++++++++++++++--------------- Source/cmInstrumentation.h | 3 +-- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Source/cmInstrumentation.cxx b/Source/cmInstrumentation.cxx index 4e4a13d737..c8e2ee93c3 100644 --- a/Source/cmInstrumentation.cxx +++ b/Source/cmInstrumentation.cxx @@ -18,6 +18,7 @@ #include "cmsys/Directory.hxx" #include "cmsys/FStream.hxx" +#include "cmsys/RegularExpression.hxx" #include "cmsys/SystemInformation.hxx" #include "cmCMakePath.h" @@ -477,12 +478,12 @@ void cmInstrumentation::InsertTimingData( root["duration"] = static_cast(duration); } -Json::Value cmInstrumentation::ReadJsonSnippet(std::string const& directory, - std::string const& file_name) +Json::Value cmInstrumentation::ReadJsonSnippet(std::string const& file_name) { Json::CharReaderBuilder builder; builder["collectComments"] = false; - cmsys::ifstream ftmp(cmStrCat(directory, '/', file_name).c_str()); + cmsys::ifstream ftmp( + cmStrCat(this->timingDirv1, "/data/", file_name).c_str()); Json::Value snippetData; builder["collectComments"] = false; @@ -916,26 +917,33 @@ void cmInstrumentation::PrepareDataForCDash(std::string const& data_dir, void cmInstrumentation::WriteTraceFile(Json::Value const& index, std::string const& trace_name) { - std::string const& directory = cmStrCat(this->timingDirv1, "/data"); - std::vector snippets = std::vector(); + std::vector snippets = std::vector(); for (auto const& f : index["snippets"]) { - Json::Value snippetData = this->ReadJsonSnippet(directory, f.asString()); - snippets.push_back(snippetData); + snippets.push_back(f.asString()); } // Reverse-sort snippets by timeEnd (timeStart + duration) as a // prerequisite for AssignTargetToTraceThread(). - std::sort(snippets.begin(), snippets.end(), - [](Json::Value snippetA, Json::Value snippetB) { - uint64_t timeEndA = snippetA["timeStart"].asUInt64() + - snippetA["duration"].asUInt64(); - uint64_t timeEndB = snippetB["timeStart"].asUInt64() + - snippetB["duration"].asUInt64(); - return timeEndA > timeEndB; - }); + auto extractSnippetTimestamp = [](std::string file) -> std::string { + cmsys::RegularExpression snippetTimeRegex( + "[A-Za-z]+-[A-Za-z0-9]+-([0-9T\\-]+)\\.json"); + cmsys::RegularExpressionMatch matchA; + if (snippetTimeRegex.find(file.c_str(), matchA)) { + return matchA.match(1); + } + return ""; + }; + std::sort( + snippets.begin(), snippets.end(), + [extractSnippetTimestamp](std::string snippetA, std::string snippetB) { + return extractSnippetTimestamp(snippetA) > + extractSnippetTimestamp(snippetB); + }); Json::Value trace = Json::arrayValue; + Json::Value snippetData; std::vector workers = std::vector(); - for (auto const& snippetData : snippets) { + for (auto const& snippetFile : snippets) { + snippetData = this->ReadJsonSnippet(snippetFile); this->AppendTraceEvent(trace, workers, snippetData); } diff --git a/Source/cmInstrumentation.h b/Source/cmInstrumentation.h index fae948551f..9975fc1922 100644 --- a/Source/cmInstrumentation.h +++ b/Source/cmInstrumentation.h @@ -74,8 +74,7 @@ public: std::string const& GetCDashDir(); private: - Json::Value ReadJsonSnippet(std::string const& directory, - std::string const& file_name); + Json::Value ReadJsonSnippet(std::string const& file_name); void WriteInstrumentationJson(Json::Value& index, std::string const& directory, std::string const& file_name); From f029bd04bf646b40456885a021d970e57445af03 Mon Sep 17 00:00:00 2001 From: Tyler Yankee Date: Thu, 25 Sep 2025 11:45:33 -0400 Subject: [PATCH 3/3] instrumentation: Write trace file incrementally Periodically clearing the output stream buffer saves on memory usage for large traces. --- Source/cmInstrumentation.cxx | 76 +++++++++++++++++++++++++++++------- Source/cmInstrumentation.h | 4 +- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/Source/cmInstrumentation.cxx b/Source/cmInstrumentation.cxx index c8e2ee93c3..59c16e7f80 100644 --- a/Source/cmInstrumentation.cxx +++ b/Source/cmInstrumentation.cxx @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -373,7 +374,7 @@ int cmInstrumentation::CollectTimingData(cmInstrumentationQuery::Hook hook) if (this->HasOption(cmInstrumentationQuery::Option::Trace)) { std::string trace_name = cmStrCat("trace-", suffix_time, ".json"); this->WriteTraceFile(index, trace_name); - index["trace"] = "trace/" + trace_name; + index["trace"] = cmStrCat("trace/", trace_name); } // Write index file @@ -509,10 +510,21 @@ void cmInstrumentation::WriteInstrumentationJson(Json::Value& root, std::unique_ptr(wbuilder.newStreamWriter()); std::string const& directory = cmStrCat(this->timingDirv1, '/', subdir); cmSystemTools::MakeDirectory(directory); + cmsys::ofstream ftmp(cmStrCat(directory, '/', file_name).c_str()); - JsonWriter->write(root, &ftmp); - ftmp << "\n"; - ftmp.close(); + if (!ftmp.good()) { + throw std::runtime_error(std::string("Unable to open: ") + file_name); + } + + try { + JsonWriter->write(root, &ftmp); + ftmp << "\n"; + ftmp.close(); + } catch (std::ios_base::failure& fail) { + cmSystemTools::Error(cmStrCat("Failed to write JSON: ", fail.what())); + } catch (...) { + cmSystemTools::Error("Error writing JSON output for instrumentation."); + } } std::string cmInstrumentation::InstrumentTest( @@ -939,20 +951,56 @@ void cmInstrumentation::WriteTraceFile(Json::Value const& index, extractSnippetTimestamp(snippetB); }); - Json::Value trace = Json::arrayValue; - Json::Value snippetData; + std::string traceDir = cmStrCat(this->timingDirv1, "/data/trace/"); + std::string traceFile = cmStrCat(traceDir, trace_name); + cmSystemTools::MakeDirectory(traceDir); + cmsys::ofstream traceStream; + Json::StreamWriterBuilder wbuilder; + wbuilder["indentation"] = "\t"; + std::unique_ptr jsonWriter = + std::unique_ptr(wbuilder.newStreamWriter()); + traceStream.open(traceFile.c_str(), std::ios::out | std::ios::trunc); + if (!traceStream.good()) { + throw std::runtime_error(std::string("Unable to open: ") + traceFile); + } + traceStream << "["; + + // Append trace events from single snippets. Prefer writing to the output + // stream incrementally over building up a Json::arrayValue in memory for + // large traces. std::vector workers = std::vector(); - for (auto const& snippetFile : snippets) { - snippetData = this->ReadJsonSnippet(snippetFile); - this->AppendTraceEvent(trace, workers, snippetData); + Json::Value traceEvent; + Json::Value snippetData; + for (size_t i = 0; i < snippets.size(); i++) { + snippetData = this->ReadJsonSnippet(snippets[i]); + traceEvent = this->BuildTraceEvent(workers, snippetData); + try { + if (i > 0) { + traceStream << ","; + } + jsonWriter->write(traceEvent, &traceStream); + if (i % 50 == 0 || i == snippets.size() - 1) { + traceStream.flush(); + traceStream.clear(); + } + } catch (std::ios_base::failure& fail) { + cmSystemTools::Error( + cmStrCat("Failed to write to Google trace file: ", fail.what())); + } catch (...) { + cmSystemTools::Error("Error writing Google trace output."); + } } - this->WriteInstrumentationJson(trace, "data/trace", trace_name); + try { + traceStream << "]\n"; + traceStream.close(); + } catch (...) { + cmSystemTools::Error("Error writing Google trace output."); + } } -void cmInstrumentation::AppendTraceEvent(Json::Value& trace, - std::vector& workers, - Json::Value const& snippetData) +Json::Value cmInstrumentation::BuildTraceEvent(std::vector& workers, + Json::Value const& snippetData) { Json::Value snippetTraceEvent; @@ -1002,7 +1050,7 @@ void cmInstrumentation::AppendTraceEvent(Json::Value& trace, snippetData["duration"].asUInt64())); } - trace.append(snippetTraceEvent); + return snippetTraceEvent; } size_t cmInstrumentation::AssignTargetToTraceThread( diff --git a/Source/cmInstrumentation.h b/Source/cmInstrumentation.h index 9975fc1922..360759c720 100644 --- a/Source/cmInstrumentation.h +++ b/Source/cmInstrumentation.h @@ -94,8 +94,8 @@ private: std::string const& index_path); void RemoveOldFiles(std::string const& dataSubdir); void WriteTraceFile(Json::Value const& index, std::string const& trace_name); - void AppendTraceEvent(Json::Value& trace, std::vector& workers, - Json::Value const& snippetData); + Json::Value BuildTraceEvent(std::vector& workers, + Json::Value const& snippetData); size_t AssignTargetToTraceThread(std::vector& workers, uint64_t timeStart, uint64_t duration); enum LatestOrOldest