1 //===-- MIUtilString.cpp ----------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "llvm/Support/Compiler.h"
20 #include "MIUtilString.h"
23 // Details: CMIUtilString constructor.
29 CMIUtilString::CMIUtilString() : std::string() {}
32 // Details: CMIUtilString constructor.
34 // Args: vpData - Pointer to UTF8 text data.
38 CMIUtilString::CMIUtilString(const char *vpData)
39 : std::string(WithNullAsEmpty(vpData)) {}
42 // Details: CMIUtilString constructor.
44 // Args: vpStr - Text data.
48 CMIUtilString::CMIUtilString(const std::string &vrStr) : std::string(vrStr) {}
51 // Details: CMIUtilString assignment operator.
53 // Args: vpRhs - Pointer to UTF8 text data.
54 // Return: CMIUtilString & - *this string.
57 CMIUtilString &CMIUtilString::operator=(const char *vpRhs) {
58 assign(WithNullAsEmpty(vpRhs));
63 // Details: CMIUtilString assignment operator.
65 // Args: vrRhs - The other string to copy from.
66 // Return: CMIUtilString & - *this string.
69 CMIUtilString &CMIUtilString::operator=(const std::string &vrRhs) {
75 // Details: CMIUtilString destructor.
81 CMIUtilString::~CMIUtilString() {}
84 // Details: Perform a snprintf format style on a string data. A new string
86 // created and returned.
87 // Type: Static method.
88 // Args: vrFormat - (R) Format string data instruction.
89 // vArgs - (R) Var list args of any type.
90 // Return: CMIUtilString - Number of splits found in the string data.
93 CMIUtilString CMIUtilString::FormatPriv(const CMIUtilString &vrFormat,
95 CMIUtilString strResult;
97 MIint n = vrFormat.size();
99 // IOR: mysterious crash in this function on some windows builds not able to
100 // duplicate but found article which may be related. Crash occurs in
101 // vsnprintf() or va_copy().
102 // Duplicate vArgs va_list argument pointer to ensure that it can be safely
103 // used in a new frame.
104 // http://julipedia.meroh.net/2011/09/using-vacopy-to-safely-pass-ap.html
106 va_copy(argsDup, vArgs);
108 // Create a copy va_list to reset when we spin
110 va_copy(argsCpy, argsDup);
115 n = n << 4; // Reserve 16 times as much the length of the vrFormat
117 std::unique_ptr<char[]> pFormatted;
119 pFormatted.reset(new char[n + 1]); // +1 for safety margin
120 ::strncpy(&pFormatted[0], vrFormat.c_str(), n);
122 // We need to restore the variable argument list pointer to the start again
123 // before running vsnprintf() more then once
124 va_copy(argsDup, argsCpy);
126 nFinal = ::vsnprintf(&pFormatted[0], n, vrFormat.c_str(), argsDup);
127 if ((nFinal < 0) || (nFinal >= n))
128 n += abs(nFinal - n + 1);
136 strResult = pFormatted.get();
142 // Details: Perform a snprintf format style on a string data. A new string
144 // created and returned.
145 // Type: Static method.
146 // Args: vFormat - (R) Format string data instruction.
147 // ... - (R) Var list args of any type.
148 // Return: CMIUtilString - Number of splits found in the string data.
151 CMIUtilString CMIUtilString::Format(const char *vFormating, ...) {
153 va_start(args, vFormating);
154 CMIUtilString strResult =
155 CMIUtilString::FormatPriv(WithNullAsEmpty(vFormating), args);
162 // Details: Perform a snprintf format style on a string data. A new string
164 // created and returned.
165 // Type: Static method.
166 // Args: vrFormat - (R) Format string data instruction.
167 // vArgs - (R) Var list args of any type.
168 // Return: CMIUtilString - Number of splits found in the string data.
171 CMIUtilString CMIUtilString::FormatValist(const CMIUtilString &vrFormating,
173 return CMIUtilString::FormatPriv(vrFormating, vArgs);
177 // Details: Splits string into array of strings using delimiter. If multiple
179 // are found in sequence then they are not added to the list of splits.
181 // Args: vData - (R) String data to be split up.
182 // vDelimiter - (R) Delimiter char or text.
183 // vwVecSplits - (W) Container of splits found in string data.
184 // Return: size_t - Number of splits found in the string data.
187 size_t CMIUtilString::Split(const CMIUtilString &vDelimiter,
188 VecString_t &vwVecSplits) const {
191 if (this->empty() || vDelimiter.empty())
194 const size_t nLen(length());
197 // Find first occurrence which doesn't match to the delimiter
198 const size_t nSectionPos(FindFirstNot(vDelimiter, nOffset));
199 if (nSectionPos == std::string::npos)
202 // Find next occurrence of the delimiter after section
203 size_t nNextDelimiterPos(FindFirst(vDelimiter, nSectionPos));
204 if (nNextDelimiterPos == std::string::npos)
205 nNextDelimiterPos = nLen;
207 // Extract string between delimiters
208 const size_t nSectionLen(nNextDelimiterPos - nSectionPos);
209 const std::string strSection(substr(nSectionPos, nSectionLen));
210 vwVecSplits.push_back(strSection);
213 nOffset = nNextDelimiterPos + 1;
214 } while (nOffset < nLen);
216 return vwVecSplits.size();
220 // Details: Splits string into array of strings using delimiter. However the
222 // also considered for text surrounded by quotes. Text with quotes
224 // delimiter is treated as a whole. If multiple delimiter are found in
226 // then they are not added to the list of splits. Quotes that are
228 // the string as string formatted quotes are ignored (proceeded by a
230 // "\"MI GDB local C++.cpp\":88".
232 // Args: vData - (R) String data to be split up.
233 // vDelimiter - (R) Delimiter char or text.
234 // vwVecSplits - (W) Container of splits found in string data.
235 // Return: size_t - Number of splits found in the string data.
238 size_t CMIUtilString::SplitConsiderQuotes(const CMIUtilString &vDelimiter,
239 VecString_t &vwVecSplits) const {
242 if (this->empty() || vDelimiter.empty())
245 const size_t nLen(length());
248 // Find first occurrence which doesn't match to the delimiter
249 const size_t nSectionPos(FindFirstNot(vDelimiter, nOffset));
250 if (nSectionPos == std::string::npos)
253 // Find next occurrence of the delimiter after (quoted) section
254 const bool bSkipQuotedText(true);
255 bool bUnmatchedQuote(false);
256 size_t nNextDelimiterPos(
257 FindFirst(vDelimiter, bSkipQuotedText, bUnmatchedQuote, nSectionPos));
258 if (bUnmatchedQuote) {
262 if (nNextDelimiterPos == std::string::npos)
263 nNextDelimiterPos = nLen;
265 // Extract string between delimiters
266 const size_t nSectionLen(nNextDelimiterPos - nSectionPos);
267 const std::string strSection(substr(nSectionPos, nSectionLen));
268 vwVecSplits.push_back(strSection);
271 nOffset = nNextDelimiterPos + 1;
272 } while (nOffset < nLen);
274 return vwVecSplits.size();
278 // Details: Split string into lines using \n and return an array of strings.
280 // Args: vwVecSplits - (W) Container of splits found in string data.
281 // Return: size_t - Number of splits found in the string data.
284 size_t CMIUtilString::SplitLines(VecString_t &vwVecSplits) const {
285 return Split("\n", vwVecSplits);
289 // Details: Remove '\n' from the end of string if found. It does not alter
293 // Return: CMIUtilString - New version of the string.
296 CMIUtilString CMIUtilString::StripCREndOfLine() const {
297 const size_t nPos = rfind('\n');
298 if (nPos == std::string::npos)
301 const CMIUtilString strNew(substr(0, nPos));
307 // Details: Remove all '\n' from the string and replace with a space. It does
312 // Return: CMIUtilString - New version of the string.
315 CMIUtilString CMIUtilString::StripCRAll() const {
316 return FindAndReplace("\n", " ");
320 // Details: Find and replace all matches of a sub string with another string. It
322 // alter *this string.
324 // Args: vFind - (R) The string to look for.
325 // vReplaceWith - (R) The string to replace the vFind match.
326 // Return: CMIUtilString - New version of the string.
330 CMIUtilString::FindAndReplace(const CMIUtilString &vFind,
331 const CMIUtilString &vReplaceWith) const {
332 if (vFind.empty() || this->empty())
335 size_t nPos = find(vFind);
336 if (nPos == std::string::npos)
339 CMIUtilString strNew(*this);
340 while (nPos != std::string::npos) {
341 strNew.replace(nPos, vFind.length(), vReplaceWith);
342 nPos += vReplaceWith.length();
343 nPos = strNew.find(vFind, nPos);
350 // Details: Check if *this string is a decimal number.
353 // Return: bool - True = yes number, false not a number.
356 bool CMIUtilString::IsNumber() const {
360 if ((at(0) == '-') && (length() == 1))
363 const size_t nPos = find_first_not_of("-.0123456789");
364 return nPos == std::string::npos;
368 // Details: Check if *this string is a hexadecimal number.
371 // Return: bool - True = yes number, false not a number.
374 bool CMIUtilString::IsHexadecimalNumber() const {
375 // Compare '0x..' prefix
376 if ((strncmp(c_str(), "0x", 2) != 0) && (strncmp(c_str(), "0X", 2) != 0))
379 // Skip '0x..' prefix
380 const size_t nPos = find_first_not_of("01234567890ABCDEFabcedf", 2);
381 return nPos == std::string::npos;
385 // Details: Extract the number from the string. The number can be either a
387 // natural number. It cannot contain other non-numeric characters.
389 // Args: vwrNumber - (W) Number extracted from the string.
390 // Return: bool - True = yes number, false not a number.
393 bool CMIUtilString::ExtractNumber(MIint64 &vwrNumber) const {
397 return ExtractNumberFromHexadecimal(vwrNumber);
400 std::stringstream ss(const_cast<CMIUtilString &>(*this));
407 // Details: Extract the number from the hexadecimal string..
409 // Args: vwrNumber - (W) Number extracted from the string.
410 // Return: bool - True = yes number, false not a number.
413 bool CMIUtilString::ExtractNumberFromHexadecimal(MIint64 &vwrNumber) const {
416 const size_t nPos = find_first_not_of("xX01234567890ABCDEFabcedf");
417 if (nPos != std::string::npos)
421 const MIuint64 nNum = ::strtoull(this->c_str(), nullptr, 16);
425 vwrNumber = static_cast<MIint64>(nNum);
431 // Details: Determine if the text is all valid alpha numeric characters. Letters
433 // either upper or lower case.
434 // Type: Static method.
435 // Args: vpText - (R) The text data to examine.
436 // Return: bool - True = yes all alpha, false = one or more chars is non alpha.
439 bool CMIUtilString::IsAllValidAlphaAndNumeric(const char *vpText) {
440 const size_t len = ::strlen(WithNullAsEmpty(vpText));
444 for (size_t i = 0; i < len; i++, vpText++) {
445 const char c = *vpText;
446 if (::isalnum((int)c) == 0)
454 // Details: Check if two strings share equal contents.
456 // Args: vrLhs - (R) String A.
457 // vrRhs - (R) String B.
458 // Return: bool - True = yes equal, false - different.
461 bool CMIUtilString::Compare(const CMIUtilString &vrLhs,
462 const CMIUtilString &vrRhs) {
463 // Check the sizes match
464 if (vrLhs.size() != vrRhs.size())
467 return (::strncmp(vrLhs.c_str(), vrRhs.c_str(), vrLhs.size()) == 0);
471 // Details: Remove from either end of *this string the following: " \t\n\v\f\r".
474 // Return: CMIUtilString - Trimmed string.
477 CMIUtilString CMIUtilString::Trim() const {
478 CMIUtilString strNew(*this);
479 const char *pWhiteSpace = " \t\n\v\f\r";
480 const size_t nPos = find_last_not_of(pWhiteSpace);
481 if (nPos != std::string::npos) {
482 strNew = substr(0, nPos + 1);
484 const size_t nPos2 = strNew.find_first_not_of(pWhiteSpace);
485 if (nPos2 != std::string::npos) {
486 strNew = strNew.substr(nPos2);
493 // Details: Remove from either end of *this string the specified character.
496 // Return: CMIUtilString - Trimmed string.
499 CMIUtilString CMIUtilString::Trim(const char vChar) const {
500 CMIUtilString strNew(*this);
501 const size_t nLen = strNew.length();
503 if ((strNew[0] == vChar) && (strNew[nLen - 1] == vChar))
504 strNew = strNew.substr(1, nLen - 2);
511 // Details: Do a printf equivalent for printing a number in binary i.e. "b%llB".
512 // Type: Static method.
513 // Args: vnDecimal - (R) The number to represent in binary.
514 // Return: CMIUtilString - Binary number in text.
517 CMIUtilString CMIUtilString::FormatBinary(const MIuint64 vnDecimal) {
518 CMIUtilString strBinaryNumber;
520 const MIuint nConstBits = 64;
521 MIuint nRem[nConstBits + 1];
524 MIuint64 nNum = vnDecimal;
525 while ((nNum > 0) && (nLen < nConstBits)) {
526 nRem[i++] = nNum % 2;
530 char pN[nConstBits + 1];
532 for (i = nLen; i > 0; --i, j++) {
533 pN[j] = '0' + nRem[i - 1];
535 pN[j] = 0; // String NUL termination
537 strBinaryNumber = CMIUtilString::Format("0b%s", &pN[0]);
539 return strBinaryNumber;
543 // Details: Remove from a string doubled up characters so only one set left.
545 // are only removed if the previous character is already a same
548 // Args: vChar - (R) The character to search for and remove adjacent
550 // Return: CMIUtilString - New version of the string.
553 CMIUtilString CMIUtilString::RemoveRepeatedCharacters(const char vChar) {
554 return RemoveRepeatedCharacters(0, vChar);
558 // Details: Recursively remove from a string doubled up characters so only one
560 // Characters are only removed if the previous character is already a
564 // Args: vChar - (R) The character to search for and remove adjacent
566 // vnPos - Character position in the string.
567 // Return: CMIUtilString - New version of the string.
570 CMIUtilString CMIUtilString::RemoveRepeatedCharacters(size_t vnPos,
572 const char cQuote = '"';
574 // Look for first quote of two
575 const size_t nPos = find(cQuote, vnPos);
576 if (nPos == std::string::npos)
579 const size_t nPosNext = nPos + 1;
580 if (nPosNext > length())
583 if (at(nPosNext) == cQuote) {
584 *this = substr(0, nPos) + substr(nPosNext, length());
585 RemoveRepeatedCharacters(nPosNext, vChar);
592 // Details: Is the text in *this string surrounded by quotes.
595 // Return: bool - True = Yes string is quoted, false = no quoted.
598 bool CMIUtilString::IsQuoted() const {
599 const char cQuote = '"';
604 const size_t nLen = length();
605 return !((nLen > 0) && (at(nLen - 1) != cQuote));
609 // Details: Find first occurrence in *this string which matches the pattern.
611 // Args: vrPattern - (R) The pattern to search for.
612 // vnPos - The starting position at which to start searching.
614 // Return: size_t - The position of the first substring that match.
617 size_t CMIUtilString::FindFirst(const CMIUtilString &vrPattern,
618 size_t vnPos /* = 0 */) const {
619 return find(vrPattern, vnPos);
623 // Details: Find first occurrence in *this string which matches the pattern and
624 // isn't surrounded by quotes.
626 // Args: vrPattern - (R) The pattern to search for.
627 // vbSkipQuotedText - (R) True = don't look at quoted text,
628 // false = otherwise.
629 // vrwbNotFoundClosedQuote - (W) True = parsing error: unmatched
630 // quote, false = otherwise.
631 // vnPos - Position of the first character in the
632 // string to be considered in the search. (Dflt = 0)
633 // Return: size_t - The position of the first substring that matches and isn't
637 size_t CMIUtilString::FindFirst(const CMIUtilString &vrPattern,
638 const bool vbSkipQuotedText,
639 bool &vrwbNotFoundClosedQuote,
640 size_t vnPos /* = 0 */) const {
641 vrwbNotFoundClosedQuote = false;
643 if (!vbSkipQuotedText)
644 return FindFirst(vrPattern, vnPos);
646 const size_t nLen(length());
650 const size_t nQuotePos(FindFirstQuote(nPos));
651 const size_t nPatternPos(FindFirst(vrPattern, nPos));
652 if (nQuotePos == std::string::npos)
655 const size_t nQuoteClosedPos = FindFirstQuote(nQuotePos + 1);
656 if (nQuoteClosedPos == std::string::npos) {
657 vrwbNotFoundClosedQuote = true;
658 return std::string::npos;
661 if ((nPatternPos == std::string::npos) || (nPatternPos < nQuotePos))
664 nPos = nQuoteClosedPos + 1;
665 } while (nPos < nLen);
667 return std::string::npos;
671 // Details: Find first occurrence in *this string which doesn't match the
674 // Args: vrPattern - (R) The pattern to search for.
675 // vnPos - Position of the first character in the string to be
676 // considered in the search. (Dflt = 0)
677 // Return: size_t - The position of the first character that doesn't match.
680 size_t CMIUtilString::FindFirstNot(const CMIUtilString &vrPattern,
681 size_t vnPos /* = 0 */) const {
682 const size_t nLen(length());
683 const size_t nPatternLen(vrPattern.length());
685 size_t nPatternPos(vnPos);
687 const bool bMatchPattern(compare(nPatternPos, nPatternLen, vrPattern) == 0);
690 nPatternPos += nPatternLen;
691 } while (nPatternPos < nLen);
693 return std::string::npos;
697 // Details: Find first occurrence of not escaped quotation mark in *this string.
699 // Args: vnPos - Position of the first character in the string to be
700 // considered in the search.
701 // Return: size_t - The position of the quotation mark.
704 size_t CMIUtilString::FindFirstQuote(size_t vnPos) const {
705 const char cBckSlash('\\');
706 const char cQuote('"');
707 const size_t nLen(length());
711 const size_t nBckSlashPos(find(cBckSlash, nPos));
712 const size_t nQuotePos(find(cQuote, nPos));
713 if ((nBckSlashPos == std::string::npos) || (nQuotePos == std::string::npos))
716 if (nQuotePos < nBckSlashPos)
719 // Skip 2 characters: First is '\', second is that which is escaped by '\'
720 nPos = nBckSlashPos + 2;
721 } while (nPos < nLen);
723 return std::string::npos;
727 // Details: Get escaped string from *this string.
730 // Return: CMIUtilString - The escaped version of the initial string.
733 CMIUtilString CMIUtilString::Escape(bool vbEscapeQuotes /* = false */) const {
734 const size_t nLen(length());
735 CMIUtilString strNew;
736 strNew.reserve(nLen);
737 for (size_t nIndex(0); nIndex < nLen; ++nIndex) {
738 const char cUnescapedChar((*this)[nIndex]);
739 if (cUnescapedChar == '"' && vbEscapeQuotes)
740 strNew.append("\\\"");
742 strNew.append(ConvertToPrintableASCII((char)cUnescapedChar));
748 // Details: Get string with backslashes in front of double quote '"' and
753 // Return: CMIUtilString - The wrapped version of the initial string.
756 CMIUtilString CMIUtilString::AddSlashes() const {
757 const char cBckSlash('\\');
758 const size_t nLen(length());
759 CMIUtilString strNew;
760 strNew.reserve(nLen);
763 while (nOffset < nLen) {
764 const size_t nUnescapedCharPos(find_first_of("\"\\", nOffset));
765 const bool bUnescapedCharNotFound(nUnescapedCharPos == std::string::npos);
766 if (bUnescapedCharNotFound) {
767 const size_t nAppendAll(std::string::npos);
768 strNew.append(*this, nOffset, nAppendAll);
771 const size_t nAppendLen(nUnescapedCharPos - nOffset);
772 strNew.append(*this, nOffset, nAppendLen);
773 strNew.push_back(cBckSlash);
774 const char cUnescapedChar((*this)[nUnescapedCharPos]);
775 strNew.push_back(cUnescapedChar);
776 nOffset = nUnescapedCharPos + 1;
783 // Details: Remove backslashes added by CMIUtilString::AddSlashes.
786 // Return: CMIUtilString - The initial version of wrapped string.
789 CMIUtilString CMIUtilString::StripSlashes() const {
790 const char cBckSlash('\\');
791 const size_t nLen(length());
792 CMIUtilString strNew;
793 strNew.reserve(nLen);
796 while (nOffset < nLen) {
797 const size_t nBckSlashPos(find(cBckSlash, nOffset));
798 const bool bBckSlashNotFound(nBckSlashPos == std::string::npos);
799 if (bBckSlashNotFound) {
800 const size_t nAppendAll(std::string::npos);
801 strNew.append(*this, nOffset, nAppendAll);
804 const size_t nAppendLen(nBckSlashPos - nOffset);
805 strNew.append(*this, nOffset, nAppendLen);
806 const bool bBckSlashIsLast(nBckSlashPos == nLen);
807 if (bBckSlashIsLast) {
808 strNew.push_back(cBckSlash);
811 const char cEscapedChar((*this)[nBckSlashPos + 1]);
812 const size_t nEscapedCharPos(std::string("\"\\").find(cEscapedChar));
813 const bool bEscapedCharNotFound(nEscapedCharPos == std::string::npos);
814 if (bEscapedCharNotFound)
815 strNew.push_back(cBckSlash);
816 strNew.push_back(cEscapedChar);
817 nOffset = nBckSlashPos + 2;
823 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char vChar,
824 bool bEscapeQuotes) {
849 if (::isprint(vChar))
850 return Format("%c", vChar);
852 return Format("\\x%02" PRIx8, vChar);
857 CMIUtilString::ConvertCharValueToPrintableASCII(char vChar,
858 bool bEscapeQuotes) {
883 if (::isprint(vChar))
884 return Format("%c", vChar);
886 return CMIUtilString();
890 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char16_t vChar16,
891 bool bEscapeQuotes) {
892 if (vChar16 == (char16_t)(char)vChar16) {
893 // Convert char16_t to char (if possible)
895 ConvertCharValueToPrintableASCII((char)vChar16, bEscapeQuotes);
896 if (str.length() > 0)
899 return Format("\\u%02" PRIx8 "%02" PRIx8, (vChar16 >> 8) & 0xff,
903 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char32_t vChar32,
904 bool bEscapeQuotes) {
905 if (vChar32 == (char32_t)(char)vChar32) {
906 // Convert char32_t to char (if possible)
908 ConvertCharValueToPrintableASCII((char)vChar32, bEscapeQuotes);
909 if (str.length() > 0)
912 return Format("\\U%02" PRIx8 "%02" PRIx8 "%02" PRIx8 "%02" PRIx8,
913 (vChar32 >> 24) & 0xff, (vChar32 >> 16) & 0xff,
914 (vChar32 >> 8) & 0xff, vChar32 & 0xff);