1 //===-- MIUtilString.cpp ----------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Third party headers
11 #include "llvm/Support/Compiler.h"
13 #include <inttypes.h> // for PRIx8
14 #include <limits.h> // for ULONG_MAX
15 #include <memory> // std::unique_ptr
16 #include <sstream> // std::stringstream
17 #include <stdarg.h> // va_list, va_start, var_end
18 #include <string.h> // for strncmp
21 #include "MIUtilString.h"
24 //------------------------------------------------------------------------------------
25 // Details: CMIUtilString constructor.
31 CMIUtilString::CMIUtilString() : std::string() {}
34 //------------------------------------------------------------------------------------
35 // Details: CMIUtilString constructor.
37 // Args: vpData - Pointer to UTF8 text data.
41 CMIUtilString::CMIUtilString(const char *vpData) : std::string(vpData) {}
44 //------------------------------------------------------------------------------------
45 // Details: CMIUtilString constructor.
47 // Args: vpStr - Text data.
51 CMIUtilString::CMIUtilString(const std::string &vrStr) : std::string(vrStr) {}
54 //------------------------------------------------------------------------------------
55 // Details: CMIUtilString assignment operator.
57 // Args: vpRhs - Pointer to UTF8 text data.
58 // Return: CMIUtilString & - *this string.
61 CMIUtilString &CMIUtilString::operator=(const char *vpRhs) {
67 //------------------------------------------------------------------------------------
68 // Details: CMIUtilString assignment operator.
70 // Args: vrRhs - The other string to copy from.
71 // Return: CMIUtilString & - *this string.
74 CMIUtilString &CMIUtilString::operator=(const std::string &vrRhs) {
80 //------------------------------------------------------------------------------------
81 // Details: CMIUtilString destructor.
87 CMIUtilString::~CMIUtilString() {}
90 //------------------------------------------------------------------------------------
91 // Details: Perform a snprintf format style on a string data. A new string
93 // created and returned.
94 // Type: Static method.
95 // Args: vrFormat - (R) Format string data instruction.
96 // vArgs - (R) Var list args of any type.
97 // Return: CMIUtilString - Number of splits found in the string data.
100 CMIUtilString CMIUtilString::FormatPriv(const CMIUtilString &vrFormat,
102 CMIUtilString strResult;
104 MIint n = vrFormat.size();
106 // IOR: mysterious crash in this function on some windows builds not able to
108 // but found article which may be related. Crash occurs in vsnprintf() or
110 // Duplicate vArgs va_list argument pointer to ensure that it can be safely
113 // http://julipedia.meroh.net/2011/09/using-vacopy-to-safely-pass-ap.html
115 va_copy(argsDup, vArgs);
117 // Create a copy va_list to reset when we spin
119 va_copy(argsCpy, argsDup);
124 n = n << 4; // Reserve 16 times as much the length of the vrFormat
126 std::unique_ptr<char[]> pFormatted;
128 pFormatted.reset(new char[n + 1]); // +1 for safety margin
129 ::strncpy(&pFormatted[0], vrFormat.c_str(), n);
131 // We need to restore the variable argument list pointer to the start again
132 // before running vsnprintf() more then once
133 va_copy(argsDup, argsCpy);
135 nFinal = ::vsnprintf(&pFormatted[0], n, vrFormat.c_str(), argsDup);
136 if ((nFinal < 0) || (nFinal >= n))
137 n += abs(nFinal - n + 1);
145 strResult = pFormatted.get();
151 //------------------------------------------------------------------------------------
152 // Details: Perform a snprintf format style on a string data. A new string
154 // created and returned.
155 // Type: Static method.
156 // Args: vFormat - (R) Format string data instruction.
157 // ... - (R) Var list args of any type.
158 // Return: CMIUtilString - Number of splits found in the string data.
161 CMIUtilString CMIUtilString::Format(const char *vFormating, ...) {
163 va_start(args, vFormating);
164 CMIUtilString strResult = CMIUtilString::FormatPriv(vFormating, args);
171 //------------------------------------------------------------------------------------
172 // Details: Perform a snprintf format style on a string data. A new string
174 // created and returned.
175 // Type: Static method.
176 // Args: vrFormat - (R) Format string data instruction.
177 // vArgs - (R) Var list args of any type.
178 // Return: CMIUtilString - Number of splits found in the string data.
181 CMIUtilString CMIUtilString::FormatValist(const CMIUtilString &vrFormating,
183 return CMIUtilString::FormatPriv(vrFormating, vArgs);
187 //------------------------------------------------------------------------------------
188 // Details: Splits string into array of strings using delimiter. If multiple
190 // are found in sequence then they are not added to the list of splits.
192 // Args: vData - (R) String data to be split up.
193 // vDelimiter - (R) Delimiter char or text.
194 // vwVecSplits - (W) Container of splits found in string data.
195 // Return: size_t - Number of splits found in the string data.
198 size_t CMIUtilString::Split(const CMIUtilString &vDelimiter,
199 VecString_t &vwVecSplits) const {
202 if (this->empty() || vDelimiter.empty())
205 const size_t nLen(length());
208 // Find first occurrence which doesn't match to the delimiter
209 const size_t nSectionPos(FindFirstNot(vDelimiter, nOffset));
210 if (nSectionPos == std::string::npos)
213 // Find next occurrence of the delimiter after section
214 size_t nNextDelimiterPos(FindFirst(vDelimiter, nSectionPos));
215 if (nNextDelimiterPos == std::string::npos)
216 nNextDelimiterPos = nLen;
218 // Extract string between delimiters
219 const size_t nSectionLen(nNextDelimiterPos - nSectionPos);
220 const std::string strSection(substr(nSectionPos, nSectionLen));
221 vwVecSplits.push_back(strSection);
224 nOffset = nNextDelimiterPos + 1;
225 } while (nOffset < nLen);
227 return vwVecSplits.size();
231 //------------------------------------------------------------------------------------
232 // Details: Splits string into array of strings using delimiter. However the
234 // also considered for text surrounded by quotes. Text with quotes
236 // delimiter is treated as a whole. If multiple delimiter are found in
238 // then they are not added to the list of splits. Quotes that are
240 // the string as string formatted quotes are ignored (proceeded by a
242 // "\"MI GDB local C++.cpp\":88".
244 // Args: vData - (R) String data to be split up.
245 // vDelimiter - (R) Delimiter char or text.
246 // vwVecSplits - (W) Container of splits found in string data.
247 // Return: size_t - Number of splits found in the string data.
250 size_t CMIUtilString::SplitConsiderQuotes(const CMIUtilString &vDelimiter,
251 VecString_t &vwVecSplits) const {
254 if (this->empty() || vDelimiter.empty())
257 const size_t nLen(length());
260 // Find first occurrence which doesn't match to the delimiter
261 const size_t nSectionPos(FindFirstNot(vDelimiter, nOffset));
262 if (nSectionPos == std::string::npos)
265 // Find next occurrence of the delimiter after (quoted) section
266 const bool bSkipQuotedText(true);
267 bool bUnmatchedQuote(false);
268 size_t nNextDelimiterPos(
269 FindFirst(vDelimiter, bSkipQuotedText, bUnmatchedQuote, nSectionPos));
270 if (bUnmatchedQuote) {
274 if (nNextDelimiterPos == std::string::npos)
275 nNextDelimiterPos = nLen;
277 // Extract string between delimiters
278 const size_t nSectionLen(nNextDelimiterPos - nSectionPos);
279 const std::string strSection(substr(nSectionPos, nSectionLen));
280 vwVecSplits.push_back(strSection);
283 nOffset = nNextDelimiterPos + 1;
284 } while (nOffset < nLen);
286 return vwVecSplits.size();
290 //------------------------------------------------------------------------------------
291 // Details: Split string into lines using \n and return an array of strings.
293 // Args: vwVecSplits - (W) Container of splits found in string data.
294 // Return: size_t - Number of splits found in the string data.
297 size_t CMIUtilString::SplitLines(VecString_t &vwVecSplits) const {
298 return Split("\n", vwVecSplits);
302 //------------------------------------------------------------------------------------
303 // Details: Remove '\n' from the end of string if found. It does not alter
307 // Return: CMIUtilString - New version of the string.
310 CMIUtilString CMIUtilString::StripCREndOfLine() const {
311 const size_t nPos = rfind('\n');
312 if (nPos == std::string::npos)
315 const CMIUtilString strNew(substr(0, nPos));
321 //------------------------------------------------------------------------------------
322 // Details: Remove all '\n' from the string and replace with a space. It does
327 // Return: CMIUtilString - New version of the string.
330 CMIUtilString CMIUtilString::StripCRAll() const {
331 return FindAndReplace("\n", " ");
335 //------------------------------------------------------------------------------------
336 // Details: Find and replace all matches of a sub string with another string. It
338 // alter *this string.
340 // Args: vFind - (R) The string to look for.
341 // vReplaceWith - (R) The string to replace the vFind match.
342 // Return: CMIUtilString - New version of the string.
346 CMIUtilString::FindAndReplace(const CMIUtilString &vFind,
347 const CMIUtilString &vReplaceWith) const {
348 if (vFind.empty() || this->empty())
351 size_t nPos = find(vFind);
352 if (nPos == std::string::npos)
355 CMIUtilString strNew(*this);
356 while (nPos != std::string::npos) {
357 strNew.replace(nPos, vFind.length(), vReplaceWith);
358 nPos += vReplaceWith.length();
359 nPos = strNew.find(vFind, nPos);
366 //------------------------------------------------------------------------------------
367 // Details: Check if *this string is a decimal number.
370 // Return: bool - True = yes number, false not a number.
373 bool CMIUtilString::IsNumber() const {
377 if ((at(0) == '-') && (length() == 1))
380 const size_t nPos = find_first_not_of("-.0123456789");
381 if (nPos != std::string::npos)
388 //------------------------------------------------------------------------------------
389 // Details: Check if *this string is a hexadecimal number.
392 // Return: bool - True = yes number, false not a number.
395 bool CMIUtilString::IsHexadecimalNumber() const {
396 // Compare '0x..' prefix
397 if ((strncmp(c_str(), "0x", 2) != 0) && (strncmp(c_str(), "0X", 2) != 0))
400 // Skip '0x..' prefix
401 const size_t nPos = find_first_not_of("01234567890ABCDEFabcedf", 2);
402 if (nPos != std::string::npos)
409 //------------------------------------------------------------------------------------
410 // Details: Extract the number from the string. The number can be either a
412 // natural number. It cannot contain other non-numeric characters.
414 // Args: vwrNumber - (W) Number extracted from the string.
415 // Return: bool - True = yes number, false not a number.
418 bool CMIUtilString::ExtractNumber(MIint64 &vwrNumber) const {
422 if (ExtractNumberFromHexadecimal(vwrNumber))
428 std::stringstream ss(const_cast<CMIUtilString &>(*this));
435 //------------------------------------------------------------------------------------
436 // Details: Extract the number from the hexadecimal string..
438 // Args: vwrNumber - (W) Number extracted from the string.
439 // Return: bool - True = yes number, false not a number.
442 bool CMIUtilString::ExtractNumberFromHexadecimal(MIint64 &vwrNumber) const {
445 const size_t nPos = find_first_not_of("xX01234567890ABCDEFabcedf");
446 if (nPos != std::string::npos)
450 const MIuint64 nNum = ::strtoull(this->c_str(), nullptr, 16);
454 vwrNumber = static_cast<MIint64>(nNum);
460 //------------------------------------------------------------------------------------
461 // Details: Determine if the text is all valid alpha numeric characters. Letters
463 // either upper or lower case.
464 // Type: Static method.
465 // Args: vpText - (R) The text data to examine.
466 // Return: bool - True = yes all alpha, false = one or more chars is non alpha.
469 bool CMIUtilString::IsAllValidAlphaAndNumeric(const char *vpText) {
470 const size_t len = ::strlen(vpText);
474 for (size_t i = 0; i < len; i++, vpText++) {
475 const char c = *vpText;
476 if (::isalnum((int)c) == 0)
484 //------------------------------------------------------------------------------------
485 // Details: Check if two strings share equal contents.
487 // Args: vrLhs - (R) String A.
488 // vrRhs - (R) String B.
489 // Return: bool - True = yes equal, false - different.
492 bool CMIUtilString::Compare(const CMIUtilString &vrLhs,
493 const CMIUtilString &vrRhs) {
494 // Check the sizes match
495 if (vrLhs.size() != vrRhs.size())
498 return (::strncmp(vrLhs.c_str(), vrRhs.c_str(), vrLhs.size()) == 0);
502 //------------------------------------------------------------------------------------
503 // Details: Remove from either end of *this string the following: " \t\n\v\f\r".
506 // Return: CMIUtilString - Trimmed string.
509 CMIUtilString CMIUtilString::Trim() const {
510 CMIUtilString strNew(*this);
511 const char *pWhiteSpace = " \t\n\v\f\r";
512 const size_t nPos = find_last_not_of(pWhiteSpace);
513 if (nPos != std::string::npos) {
514 strNew = substr(0, nPos + 1);
516 const size_t nPos2 = strNew.find_first_not_of(pWhiteSpace);
517 if (nPos2 != std::string::npos) {
518 strNew = strNew.substr(nPos2);
525 //------------------------------------------------------------------------------------
526 // Details: Remove from either end of *this string the specified character.
529 // Return: CMIUtilString - Trimmed string.
532 CMIUtilString CMIUtilString::Trim(const char vChar) const {
533 CMIUtilString strNew(*this);
534 const size_t nLen = strNew.length();
536 if ((strNew[0] == vChar) && (strNew[nLen - 1] == vChar))
537 strNew = strNew.substr(1, nLen - 2);
544 //------------------------------------------------------------------------------------
545 // Details: Do a printf equivalent for printing a number in binary i.e. "b%llB".
546 // Type: Static method.
547 // Args: vnDecimal - (R) The number to represent in binary.
548 // Return: CMIUtilString - Binary number in text.
551 CMIUtilString CMIUtilString::FormatBinary(const MIuint64 vnDecimal) {
552 CMIUtilString strBinaryNumber;
554 const MIuint nConstBits = 64;
555 MIuint nRem[nConstBits + 1];
558 MIuint64 nNum = vnDecimal;
559 while ((nNum > 0) && (nLen < nConstBits)) {
560 nRem[i++] = nNum % 2;
564 char pN[nConstBits + 1];
566 for (i = nLen; i > 0; --i, j++) {
567 pN[j] = '0' + nRem[i - 1];
569 pN[j] = 0; // String NUL termination
571 strBinaryNumber = CMIUtilString::Format("0b%s", &pN[0]);
573 return strBinaryNumber;
577 //------------------------------------------------------------------------------------
578 // Details: Remove from a string doubled up characters so only one set left.
580 // are only removed if the previous character is already a same
583 // Args: vChar - (R) The character to search for and remove adjacent
585 // Return: CMIUtilString - New version of the string.
588 CMIUtilString CMIUtilString::RemoveRepeatedCharacters(const char vChar) {
589 return RemoveRepeatedCharacters(0, vChar);
593 //------------------------------------------------------------------------------------
594 // Details: Recursively remove from a string doubled up characters so only one
596 // Characters are only removed if the previous character is already a
600 // Args: vChar - (R) The character to search for and remove adjacent
602 // vnPos - Character position in the string.
603 // Return: CMIUtilString - New version of the string.
606 CMIUtilString CMIUtilString::RemoveRepeatedCharacters(size_t vnPos,
608 const char cQuote = '"';
610 // Look for first quote of two
611 const size_t nPos = find(cQuote, vnPos);
612 if (nPos == std::string::npos)
615 const size_t nPosNext = nPos + 1;
616 if (nPosNext > length())
619 if (at(nPosNext) == cQuote) {
620 *this = substr(0, nPos) + substr(nPosNext, length());
621 RemoveRepeatedCharacters(nPosNext, vChar);
628 //------------------------------------------------------------------------------------
629 // Details: Is the text in *this string surrounded by quotes.
632 // Return: bool - True = Yes string is quoted, false = no quoted.
635 bool CMIUtilString::IsQuoted() const {
636 const char cQuote = '"';
641 const size_t nLen = length();
642 if ((nLen > 0) && (at(nLen - 1) != cQuote))
649 //------------------------------------------------------------------------------------
650 // Details: Find first occurrence in *this string which matches the pattern.
652 // Args: vrPattern - (R) The pattern to search for.
653 // vnPos - The starting position at which to start searching.
655 // Return: size_t - The position of the first substring that match.
658 size_t CMIUtilString::FindFirst(const CMIUtilString &vrPattern,
659 size_t vnPos /* = 0 */) const {
660 return find(vrPattern, vnPos);
664 //------------------------------------------------------------------------------------
665 // Details: Find first occurrence in *this string which matches the pattern and
666 // isn't surrounded by quotes.
668 // Args: vrPattern - (R) The pattern to search for.
669 // vbSkipQuotedText - (R) True = don't look at quoted text,
670 // false = otherwise.
671 // vrwbNotFoundClosedQuote - (W) True = parsing error: unmatched
672 // quote, false = otherwise.
673 // vnPos - Position of the first character in the
674 // string to be considered in the search. (Dflt = 0)
675 // Return: size_t - The position of the first substring that matches and isn't
679 size_t CMIUtilString::FindFirst(const CMIUtilString &vrPattern,
680 const bool vbSkipQuotedText,
681 bool &vrwbNotFoundClosedQuote,
682 size_t vnPos /* = 0 */) const {
683 vrwbNotFoundClosedQuote = false;
685 if (!vbSkipQuotedText)
686 return FindFirst(vrPattern, vnPos);
688 const size_t nLen(length());
692 const size_t nQuotePos(FindFirstQuote(nPos));
693 const size_t nPatternPos(FindFirst(vrPattern, nPos));
694 if (nQuotePos == std::string::npos)
697 const size_t nQuoteClosedPos = FindFirstQuote(nQuotePos + 1);
698 if (nQuoteClosedPos == std::string::npos) {
699 vrwbNotFoundClosedQuote = true;
700 return std::string::npos;
703 if ((nPatternPos == std::string::npos) || (nPatternPos < nQuotePos))
706 nPos = nQuoteClosedPos + 1;
707 } while (nPos < nLen);
709 return std::string::npos;
713 //------------------------------------------------------------------------------------
714 // Details: Find first occurrence in *this string which doesn't match the
717 // Args: vrPattern - (R) The pattern to search for.
718 // vnPos - Position of the first character in the string to be
719 // considered in the search. (Dflt = 0)
720 // Return: size_t - The position of the first character that doesn't match.
723 size_t CMIUtilString::FindFirstNot(const CMIUtilString &vrPattern,
724 size_t vnPos /* = 0 */) const {
725 const size_t nLen(length());
726 const size_t nPatternLen(vrPattern.length());
728 size_t nPatternPos(vnPos);
730 const bool bMatchPattern(compare(nPatternPos, nPatternLen, vrPattern) == 0);
733 nPatternPos += nPatternLen;
734 } while (nPatternPos < nLen);
736 return std::string::npos;
740 //------------------------------------------------------------------------------------
741 // Details: Find first occurrence of not escaped quotation mark in *this string.
743 // Args: vnPos - Position of the first character in the string to be
744 // considered in the search.
745 // Return: size_t - The position of the quotation mark.
748 size_t CMIUtilString::FindFirstQuote(size_t vnPos) const {
749 const char cBckSlash('\\');
750 const char cQuote('"');
751 const size_t nLen(length());
755 const size_t nBckSlashPos(find(cBckSlash, nPos));
756 const size_t nQuotePos(find(cQuote, nPos));
757 if ((nBckSlashPos == std::string::npos) || (nQuotePos == std::string::npos))
760 if (nQuotePos < nBckSlashPos)
763 // Skip 2 characters: First is '\', second is that which is escaped by '\'
764 nPos = nBckSlashPos + 2;
765 } while (nPos < nLen);
767 return std::string::npos;
771 //------------------------------------------------------------------------------------
772 // Details: Get escaped string from *this string.
775 // Return: CMIUtilString - The escaped version of the initial string.
778 CMIUtilString CMIUtilString::Escape(bool vbEscapeQuotes /* = false */) const {
779 const size_t nLen(length());
780 CMIUtilString strNew;
781 strNew.reserve(nLen);
782 for (size_t nIndex(0); nIndex < nLen; ++nIndex) {
783 const char cUnescapedChar((*this)[nIndex]);
784 if (cUnescapedChar == '"' && vbEscapeQuotes)
785 strNew.append("\\\"");
787 strNew.append(ConvertToPrintableASCII((char)cUnescapedChar));
793 //------------------------------------------------------------------------------------
794 // Details: Get string with backslashes in front of double quote '"' and
799 // Return: CMIUtilString - The wrapped version of the initial string.
802 CMIUtilString CMIUtilString::AddSlashes() const {
803 const char cBckSlash('\\');
804 const size_t nLen(length());
805 CMIUtilString strNew;
806 strNew.reserve(nLen);
809 while (nOffset < nLen) {
810 const size_t nUnescapedCharPos(find_first_of("\"\\", nOffset));
811 const bool bUnescapedCharNotFound(nUnescapedCharPos == std::string::npos);
812 if (bUnescapedCharNotFound) {
813 const size_t nAppendAll(std::string::npos);
814 strNew.append(*this, nOffset, nAppendAll);
817 const size_t nAppendLen(nUnescapedCharPos - nOffset);
818 strNew.append(*this, nOffset, nAppendLen);
819 strNew.push_back(cBckSlash);
820 const char cUnescapedChar((*this)[nUnescapedCharPos]);
821 strNew.push_back(cUnescapedChar);
822 nOffset = nUnescapedCharPos + 1;
829 //------------------------------------------------------------------------------------
830 // Details: Remove backslashes added by CMIUtilString::AddSlashes.
833 // Return: CMIUtilString - The initial version of wrapped string.
836 CMIUtilString CMIUtilString::StripSlashes() const {
837 const char cBckSlash('\\');
838 const size_t nLen(length());
839 CMIUtilString strNew;
840 strNew.reserve(nLen);
843 while (nOffset < nLen) {
844 const size_t nBckSlashPos(find(cBckSlash, nOffset));
845 const bool bBckSlashNotFound(nBckSlashPos == std::string::npos);
846 if (bBckSlashNotFound) {
847 const size_t nAppendAll(std::string::npos);
848 strNew.append(*this, nOffset, nAppendAll);
851 const size_t nAppendLen(nBckSlashPos - nOffset);
852 strNew.append(*this, nOffset, nAppendLen);
853 const bool bBckSlashIsLast(nBckSlashPos == nLen);
854 if (bBckSlashIsLast) {
855 strNew.push_back(cBckSlash);
858 const char cEscapedChar((*this)[nBckSlashPos + 1]);
859 const size_t nEscapedCharPos(std::string("\"\\").find(cEscapedChar));
860 const bool bEscapedCharNotFound(nEscapedCharPos == std::string::npos);
861 if (bEscapedCharNotFound)
862 strNew.push_back(cBckSlash);
863 strNew.push_back(cEscapedChar);
864 nOffset = nBckSlashPos + 2;
870 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char vChar,
871 bool bEscapeQuotes) {
896 if (::isprint(vChar))
897 return Format("%c", vChar);
899 return Format("\\x%02" PRIx8, vChar);
904 CMIUtilString::ConvertCharValueToPrintableASCII(char vChar,
905 bool bEscapeQuotes) {
930 if (::isprint(vChar))
931 return Format("%c", vChar);
933 return CMIUtilString();
937 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char16_t vChar16,
938 bool bEscapeQuotes) {
939 if (vChar16 == (char16_t)(char)vChar16) {
940 // Convert char16_t to char (if possible)
942 ConvertCharValueToPrintableASCII((char)vChar16, bEscapeQuotes);
943 if (str.length() > 0)
946 return Format("\\u%02" PRIx8 "%02" PRIx8, (vChar16 >> 8) & 0xff,
950 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char32_t vChar32,
951 bool bEscapeQuotes) {
952 if (vChar32 == (char32_t)(char)vChar32) {
953 // Convert char32_t to char (if possible)
955 ConvertCharValueToPrintableASCII((char)vChar32, bEscapeQuotes);
956 if (str.length() > 0)
959 return Format("\\U%02" PRIx8 "%02" PRIx8 "%02" PRIx8 "%02" PRIx8,
960 (vChar32 >> 24) & 0xff, (vChar32 >> 16) & 0xff,
961 (vChar32 >> 8) & 0xff, vChar32 & 0xff);