From 1f29e3eaf5ea5ed4650cac5378532fd8b06eb692 Mon Sep 17 00:00:00 2001 From: KWSys Upstream Date: Wed, 29 Jan 2025 08:52:13 -0500 Subject: [PATCH] KWSys 2025-01-29 (6f535da1) Code extracted from: https://gitlab.kitware.com/utils/kwsys.git at commit 6f535da17dc3f439216b87a4f5dae8f7d613e6c0 (master). Upstream Shortlog ----------------- Brad King (2): a298a4aa Process: Suppress PID conversion warning 8142980e Process: Revert "Add function to get parent PID" Martin Duffy (2): fda58f27 Process.h.in: Add missing undef for Process_KillPID 9727dfdb ProcessUNIX: Add function to get parent PID Nikita Nemkin (3): 46fc21ef RegularExpression: Remove redundant overloads d022423b RegularExpression: Add a method to query the number of capture groups c8f5f4bf RegularExpression: Add an options parameter to find() Sean McBride (1): 3489e4a2 Encoding: Reformulate to avoid unsigned int overflow --- EncodingCXX.cxx | 28 +++++++------ Process.h.in | 1 + RegularExpression.cxx | 16 ++++++-- RegularExpression.hxx.in | 88 ++++++++++++++++++---------------------- 4 files changed, 68 insertions(+), 65 deletions(-) diff --git a/EncodingCXX.cxx b/EncodingCXX.cxx index 5bdfba30aa..922dffd684 100644 --- a/EncodingCXX.cxx +++ b/EncodingCXX.cxx @@ -194,12 +194,14 @@ std::string Encoding::ToNarrow(std::wstring const& str) std::wstring Encoding::ToWide(char const* cstr) { std::wstring wstr; - size_t length = kwsysEncoding_mbstowcs(nullptr, cstr, 0) + 1; - if (length > 0) { - std::vector wchars(length); - if (kwsysEncoding_mbstowcs(&wchars[0], cstr, length) > 0) { - wstr = &wchars[0]; - } + size_t length = kwsysEncoding_mbstowcs(nullptr, cstr, 0); + if (length == 0 || length == static_cast(-1)) { + return wstr; + } + ++length; + std::vector wchars(length); + if (kwsysEncoding_mbstowcs(wchars.data(), cstr, length) > 0) { + wstr = wchars.data(); } return wstr; } @@ -207,12 +209,14 @@ std::wstring Encoding::ToWide(char const* cstr) std::string Encoding::ToNarrow(wchar_t const* wcstr) { std::string str; - size_t length = kwsysEncoding_wcstombs(nullptr, wcstr, 0) + 1; - if (length > 0) { - std::vector chars(length); - if (kwsysEncoding_wcstombs(&chars[0], wcstr, length) > 0) { - str = &chars[0]; - } + size_t length = kwsysEncoding_wcstombs(nullptr, wcstr, 0); + if (length == 0 || length == static_cast(-1)) { + return str; + } + ++length; + std::vector chars(length); + if (kwsysEncoding_wcstombs(chars.data(), wcstr, length) > 0) { + str = chars.data(); } return str; } diff --git a/Process.h.in b/Process.h.in index 313554ef1f..2017d784b5 100644 --- a/Process.h.in +++ b/Process.h.in @@ -537,6 +537,7 @@ kwsysEXPORT void kwsysProcess_ResetStartTime(kwsysProcess* cp); # undef kwsysProcess_WaitForExit # undef kwsysProcess_Interrupt # undef kwsysProcess_Kill +# undef kwsysProcess_KillPID # undef kwsysProcess_ResetStartTime # endif #endif diff --git a/RegularExpression.cxx b/RegularExpression.cxx index cb7e0fb527..b9850d4fdc 100644 --- a/RegularExpression.cxx +++ b/RegularExpression.cxx @@ -60,6 +60,7 @@ RegularExpression::RegularExpression(RegularExpression const& rxp) this->regstart = rxp.regstart; // Copy starting index this->reganch = rxp.reganch; // Copy remaining private data this->regmlen = rxp.regmlen; // Copy remaining private data + this->regnpar = rxp.regnpar; } // operator= -- Copies the given regular expression. @@ -93,6 +94,7 @@ RegularExpression& RegularExpression::operator=(RegularExpression const& rxp) this->regstart = rxp.regstart; // Copy starting index this->reganch = rxp.reganch; // Copy remaining private data this->regmlen = rxp.regmlen; // Copy remaining private data + this->regnpar = rxp.regnpar; return *this; } @@ -371,6 +373,7 @@ bool RegularExpression::compile(char const* exp) // #endif this->program = new char[comp.regsize]; this->progsize = static_cast(comp.regsize); + this->regnpar = comp.regnpar; if (!this->program) { // RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory), @@ -852,6 +855,7 @@ public: char const* regbol; // Beginning of input, for ^ check. char const** regstartp; // Pointer to startp array. char const** regendp; // Ditto for endp. + char const* regreject; // Reject matches ending here, for NONEMPTY_AT_OFFSET. int regtry(char const*, char const**, char const**, char const*); int regmatch(char const*); @@ -862,7 +866,8 @@ public: // Returns true if found, and sets start and end indexes accordingly. bool RegularExpression::find(char const* string, RegularExpressionMatch& rmatch, - std::string::size_type offset) const + std::string::size_type offset, + unsigned options) const { char const* s; @@ -894,10 +899,11 @@ bool RegularExpression::find(char const* string, } RegExpFind regFind; + s = string + offset; // Mark beginning of line for ^ . - regFind.regbol = string; - s = string + offset; + regFind.regbol = (options & BOL_AT_OFFSET) ? s : string; + regFind.regreject = (options & NONEMPTY_AT_OFFSET) ? s : nullptr; // Simplest case: anchored match need be tried only once. if (this->reganch) @@ -1164,7 +1170,9 @@ int RegExpFind::regmatch(char const* prog) } // break; case END: - return (1); // Success! + if (reginput == regreject) + return (0); // Can't end a match here + return (1); // Success! default: // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), diff --git a/RegularExpression.hxx.in b/RegularExpression.hxx.in index 184d6acddc..9d3325becf 100644 --- a/RegularExpression.hxx.in +++ b/RegularExpression.hxx.in @@ -42,11 +42,9 @@ public: bool isValid() const; void clear(); - std::string::size_type start() const; - std::string::size_type end() const; - std::string::size_type start(int n) const; - std::string::size_type end(int n) const; - std::string match(int n) const; + std::string::size_type start(int n = 0) const; + std::string::size_type end(int n = 0) const; + std::string match(int n = 0) const; enum { @@ -99,22 +97,6 @@ inline void RegularExpressionMatch::clear() searchstring = nullptr; } -/** - * \brief Returns the start index of the full match. - */ -inline std::string::size_type RegularExpressionMatch::start() const -{ - return static_cast(this->startp[0] - searchstring); -} - -/** - * \brief Returns the end index of the full match. - */ -inline std::string::size_type RegularExpressionMatch::end() const -{ - return static_cast(this->endp[0] - searchstring); -} - /** * \brief Returns the start index of nth submatch. * start(0) is the start of the full match. @@ -299,6 +281,14 @@ inline std::string RegularExpressionMatch::match(int n) const class @KWSYS_NAMESPACE@_EXPORT RegularExpression { public: + enum Options : unsigned + { + // Match ^ at offset instead of the input start. + BOL_AT_OFFSET = 1, + // If an empty match is found at offset, continue searching. + NONEMPTY_AT_OFFSET = 2, + }; + /** * Instantiate RegularExpression with program=nullptr. */ @@ -345,33 +335,33 @@ public: * RegularExpressionMatch instances. */ bool find(char const*, RegularExpressionMatch&, - std::string::size_type offset = 0) const; + std::string::size_type offset = 0, unsigned options = 0) const; /** * Matches the regular expression to the given string. * Returns true if found, and sets start and end indexes accordingly. */ - inline bool find(char const*, std::string::size_type offset = 0); + inline bool find(char const*, std::string::size_type offset = 0, + unsigned options = 0); /** * Matches the regular expression to the given std string. * Returns true if found, and sets start and end indexes accordingly. */ - inline bool find(std::string const&, std::string::size_type offset = 0); + inline bool find(std::string const&, std::string::size_type offset = 0, + unsigned options = 0); /** * Match indices */ inline RegularExpressionMatch const& regMatch() const; - inline std::string::size_type start() const; - inline std::string::size_type end() const; - inline std::string::size_type start(int n) const; - inline std::string::size_type end(int n) const; + inline std::string::size_type start(int n = 0) const; + inline std::string::size_type end(int n = 0) const; /** * Match strings */ - inline std::string match(int n) const; + inline std::string match(int n = 0) const; /** * Copy the given regular expression. @@ -406,6 +396,11 @@ public: */ inline void set_invalid(); + /** + * The number of capture groups. + */ + inline int num_groups(); + private: RegularExpressionMatch regmatch; char regstart; // Internal use only @@ -414,6 +409,7 @@ private: std::string::size_type regmlen; // Internal use only char* program; int progsize; + int regnpar; }; /** @@ -425,6 +421,7 @@ inline RegularExpression::RegularExpression() , regmust{} , program{ nullptr } , progsize{} + , regnpar{} { } @@ -438,6 +435,7 @@ inline RegularExpression::RegularExpression(char const* s) , regmust{} , program{ nullptr } , progsize{} + , regnpar{} { if (s) { this->compile(s); @@ -454,6 +452,7 @@ inline RegularExpression::RegularExpression(std::string const& s) , regmust{} , program{ nullptr } , progsize{} + , regnpar{} { this->compile(s); } @@ -482,9 +481,10 @@ inline bool RegularExpression::compile(std::string const& s) * Returns true if found, and sets start and end indexes accordingly. */ inline bool RegularExpression::find(char const* s, - std::string::size_type offset) + std::string::size_type offset, + unsigned options) { - return this->find(s, this->regmatch, offset); + return this->find(s, this->regmatch, offset, options); } /** @@ -492,9 +492,10 @@ inline bool RegularExpression::find(char const* s, * Returns true if found, and sets start and end indexes accordingly. */ inline bool RegularExpression::find(std::string const& s, - std::string::size_type offset) + std::string::size_type offset, + unsigned options) { - return this->find(s.c_str(), offset); + return this->find(s.c_str(), this->regmatch, offset, options); } /** @@ -505,22 +506,6 @@ inline RegularExpressionMatch const& RegularExpression::regMatch() const return this->regmatch; } -/** - * Returns the start index of the full match. - */ -inline std::string::size_type RegularExpression::start() const -{ - return regmatch.start(); -} - -/** - * Returns the end index of the full match. - */ -inline std::string::size_type RegularExpression::end() const -{ - return regmatch.end(); -} - /** * Return start index of nth submatch. start(0) is the start of the full match. */ @@ -571,6 +556,11 @@ inline void RegularExpression::set_invalid() this->program = nullptr; } +inline int RegularExpression::num_groups() +{ + return this->regnpar - 1; +} + } // namespace @KWSYS_NAMESPACE@ #endif