1 //===-- MIUtilString.cpp ----------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Third party headers
11 #include "llvm/Support/Compiler.h"
21 #include "MIUtilString.h"
24 //------------------------------------------------------------------------------------
25 // Details: CMIUtilString constructor.
31 CMIUtilString::CMIUtilString() : std::string() {}
34 //------------------------------------------------------------------------------------
35 // Details: CMIUtilString constructor.
37 // Args: vpData - Pointer to UTF8 text data.
41 CMIUtilString::CMIUtilString(const char *vpData) : std::string(vpData) {}
44 //------------------------------------------------------------------------------------
45 // Details: CMIUtilString constructor.
47 // Args: vpStr - Text data.
51 CMIUtilString::CMIUtilString(const std::string &vrStr) : std::string(vrStr) {}
54 //------------------------------------------------------------------------------------
55 // Details: CMIUtilString assignment operator.
57 // Args: vpRhs - Pointer to UTF8 text data.
58 // Return: CMIUtilString & - *this string.
61 CMIUtilString &CMIUtilString::operator=(const char *vpRhs) {
67 //------------------------------------------------------------------------------------
68 // Details: CMIUtilString assignment operator.
70 // Args: vrRhs - The other string to copy from.
71 // Return: CMIUtilString & - *this string.
74 CMIUtilString &CMIUtilString::operator=(const std::string &vrRhs) {
80 //------------------------------------------------------------------------------------
81 // Details: CMIUtilString destructor.
87 CMIUtilString::~CMIUtilString() {}
90 //------------------------------------------------------------------------------------
91 // Details: Perform a snprintf format style on a string data. A new string
93 // created and returned.
94 // Type: Static method.
95 // Args: vrFormat - (R) Format string data instruction.
96 // vArgs - (R) Var list args of any type.
97 // Return: CMIUtilString - Number of splits found in the string data.
100 CMIUtilString CMIUtilString::FormatPriv(const CMIUtilString &vrFormat,
102 CMIUtilString strResult;
104 MIint n = vrFormat.size();
106 // IOR: mysterious crash in this function on some windows builds not able to
108 // but found article which may be related. Crash occurs in vsnprintf() or
110 // Duplicate vArgs va_list argument pointer to ensure that it can be safely
113 // http://julipedia.meroh.net/2011/09/using-vacopy-to-safely-pass-ap.html
115 va_copy(argsDup, vArgs);
117 // Create a copy va_list to reset when we spin
119 va_copy(argsCpy, argsDup);
124 n = n << 4; // Reserve 16 times as much the length of the vrFormat
126 std::unique_ptr<char[]> pFormatted;
128 pFormatted.reset(new char[n + 1]); // +1 for safety margin
129 ::strncpy(&pFormatted[0], vrFormat.c_str(), n);
131 // We need to restore the variable argument list pointer to the start again
132 // before running vsnprintf() more then once
133 va_copy(argsDup, argsCpy);
135 nFinal = ::vsnprintf(&pFormatted[0], n, vrFormat.c_str(), argsDup);
136 if ((nFinal < 0) || (nFinal >= n))
137 n += abs(nFinal - n + 1);
145 strResult = pFormatted.get();
151 //------------------------------------------------------------------------------------
152 // Details: Perform a snprintf format style on a string data. A new string
154 // created and returned.
155 // Type: Static method.
156 // Args: vFormat - (R) Format string data instruction.
157 // ... - (R) Var list args of any type.
158 // Return: CMIUtilString - Number of splits found in the string data.
161 CMIUtilString CMIUtilString::Format(const char *vFormating, ...) {
163 va_start(args, vFormating);
164 CMIUtilString strResult = CMIUtilString::FormatPriv(vFormating, args);
171 //------------------------------------------------------------------------------------
172 // Details: Perform a snprintf format style on a string data. A new string
174 // created and returned.
175 // Type: Static method.
176 // Args: vrFormat - (R) Format string data instruction.
177 // vArgs - (R) Var list args of any type.
178 // Return: CMIUtilString - Number of splits found in the string data.
181 CMIUtilString CMIUtilString::FormatValist(const CMIUtilString &vrFormating,
183 return CMIUtilString::FormatPriv(vrFormating, vArgs);
187 //------------------------------------------------------------------------------------
188 // Details: Splits string into array of strings using delimiter. If multiple
190 // are found in sequence then they are not added to the list of splits.
192 // Args: vData - (R) String data to be split up.
193 // vDelimiter - (R) Delimiter char or text.
194 // vwVecSplits - (W) Container of splits found in string data.
195 // Return: size_t - Number of splits found in the string data.
198 size_t CMIUtilString::Split(const CMIUtilString &vDelimiter,
199 VecString_t &vwVecSplits) const {
202 if (this->empty() || vDelimiter.empty())
205 const size_t nLen(length());
208 // Find first occurrence which doesn't match to the delimiter
209 const size_t nSectionPos(FindFirstNot(vDelimiter, nOffset));
210 if (nSectionPos == std::string::npos)
213 // Find next occurrence of the delimiter after section
214 size_t nNextDelimiterPos(FindFirst(vDelimiter, nSectionPos));
215 if (nNextDelimiterPos == std::string::npos)
216 nNextDelimiterPos = nLen;
218 // Extract string between delimiters
219 const size_t nSectionLen(nNextDelimiterPos - nSectionPos);
220 const std::string strSection(substr(nSectionPos, nSectionLen));
221 vwVecSplits.push_back(strSection);
224 nOffset = nNextDelimiterPos + 1;
225 } while (nOffset < nLen);
227 return vwVecSplits.size();
231 //------------------------------------------------------------------------------------
232 // Details: Splits string into array of strings using delimiter. However the
234 // also considered for text surrounded by quotes. Text with quotes
236 // delimiter is treated as a whole. If multiple delimiter are found in
238 // then they are not added to the list of splits. Quotes that are
240 // the string as string formatted quotes are ignored (proceeded by a
242 // "\"MI GDB local C++.cpp\":88".
244 // Args: vData - (R) String data to be split up.
245 // vDelimiter - (R) Delimiter char or text.
246 // vwVecSplits - (W) Container of splits found in string data.
247 // Return: size_t - Number of splits found in the string data.
250 size_t CMIUtilString::SplitConsiderQuotes(const CMIUtilString &vDelimiter,
251 VecString_t &vwVecSplits) const {
254 if (this->empty() || vDelimiter.empty())
257 const size_t nLen(length());
260 // Find first occurrence which doesn't match to the delimiter
261 const size_t nSectionPos(FindFirstNot(vDelimiter, nOffset));
262 if (nSectionPos == std::string::npos)
265 // Find next occurrence of the delimiter after (quoted) section
266 const bool bSkipQuotedText(true);
267 bool bUnmatchedQuote(false);
268 size_t nNextDelimiterPos(
269 FindFirst(vDelimiter, bSkipQuotedText, bUnmatchedQuote, nSectionPos));
270 if (bUnmatchedQuote) {
274 if (nNextDelimiterPos == std::string::npos)
275 nNextDelimiterPos = nLen;
277 // Extract string between delimiters
278 const size_t nSectionLen(nNextDelimiterPos - nSectionPos);
279 const std::string strSection(substr(nSectionPos, nSectionLen));
280 vwVecSplits.push_back(strSection);
283 nOffset = nNextDelimiterPos + 1;
284 } while (nOffset < nLen);
286 return vwVecSplits.size();
290 //------------------------------------------------------------------------------------
291 // Details: Split string into lines using \n and return an array of strings.
293 // Args: vwVecSplits - (W) Container of splits found in string data.
294 // Return: size_t - Number of splits found in the string data.
297 size_t CMIUtilString::SplitLines(VecString_t &vwVecSplits) const {
298 return Split("\n", vwVecSplits);
302 //------------------------------------------------------------------------------------
303 // Details: Remove '\n' from the end of string if found. It does not alter
307 // Return: CMIUtilString - New version of the string.
310 CMIUtilString CMIUtilString::StripCREndOfLine() const {
311 const size_t nPos = rfind('\n');
312 if (nPos == std::string::npos)
315 const CMIUtilString strNew(substr(0, nPos));
321 //------------------------------------------------------------------------------------
322 // Details: Remove all '\n' from the string and replace with a space. It does
327 // Return: CMIUtilString - New version of the string.
330 CMIUtilString CMIUtilString::StripCRAll() const {
331 return FindAndReplace("\n", " ");
335 //------------------------------------------------------------------------------------
336 // Details: Find and replace all matches of a sub string with another string. It
338 // alter *this string.
340 // Args: vFind - (R) The string to look for.
341 // vReplaceWith - (R) The string to replace the vFind match.
342 // Return: CMIUtilString - New version of the string.
346 CMIUtilString::FindAndReplace(const CMIUtilString &vFind,
347 const CMIUtilString &vReplaceWith) const {
348 if (vFind.empty() || this->empty())
351 size_t nPos = find(vFind);
352 if (nPos == std::string::npos)
355 CMIUtilString strNew(*this);
356 while (nPos != std::string::npos) {
357 strNew.replace(nPos, vFind.length(), vReplaceWith);
358 nPos += vReplaceWith.length();
359 nPos = strNew.find(vFind, nPos);
366 //------------------------------------------------------------------------------------
367 // Details: Check if *this string is a decimal number.
370 // Return: bool - True = yes number, false not a number.
373 bool CMIUtilString::IsNumber() const {
377 if ((at(0) == '-') && (length() == 1))
380 const size_t nPos = find_first_not_of("-.0123456789");
381 return nPos == std::string::npos;
385 //------------------------------------------------------------------------------------
386 // Details: Check if *this string is a hexadecimal number.
389 // Return: bool - True = yes number, false not a number.
392 bool CMIUtilString::IsHexadecimalNumber() const {
393 // Compare '0x..' prefix
394 if ((strncmp(c_str(), "0x", 2) != 0) && (strncmp(c_str(), "0X", 2) != 0))
397 // Skip '0x..' prefix
398 const size_t nPos = find_first_not_of("01234567890ABCDEFabcedf", 2);
399 return nPos == std::string::npos;
403 //------------------------------------------------------------------------------------
404 // Details: Extract the number from the string. The number can be either a
406 // natural number. It cannot contain other non-numeric characters.
408 // Args: vwrNumber - (W) Number extracted from the string.
409 // Return: bool - True = yes number, false not a number.
412 bool CMIUtilString::ExtractNumber(MIint64 &vwrNumber) const {
416 return ExtractNumberFromHexadecimal(vwrNumber);
419 std::stringstream ss(const_cast<CMIUtilString &>(*this));
426 //------------------------------------------------------------------------------------
427 // Details: Extract the number from the hexadecimal string..
429 // Args: vwrNumber - (W) Number extracted from the string.
430 // Return: bool - True = yes number, false not a number.
433 bool CMIUtilString::ExtractNumberFromHexadecimal(MIint64 &vwrNumber) const {
436 const size_t nPos = find_first_not_of("xX01234567890ABCDEFabcedf");
437 if (nPos != std::string::npos)
441 const MIuint64 nNum = ::strtoull(this->c_str(), nullptr, 16);
445 vwrNumber = static_cast<MIint64>(nNum);
451 //------------------------------------------------------------------------------------
452 // Details: Determine if the text is all valid alpha numeric characters. Letters
454 // either upper or lower case.
455 // Type: Static method.
456 // Args: vpText - (R) The text data to examine.
457 // Return: bool - True = yes all alpha, false = one or more chars is non alpha.
460 bool CMIUtilString::IsAllValidAlphaAndNumeric(const char *vpText) {
461 const size_t len = ::strlen(vpText);
465 for (size_t i = 0; i < len; i++, vpText++) {
466 const char c = *vpText;
467 if (::isalnum((int)c) == 0)
475 //------------------------------------------------------------------------------------
476 // Details: Check if two strings share equal contents.
478 // Args: vrLhs - (R) String A.
479 // vrRhs - (R) String B.
480 // Return: bool - True = yes equal, false - different.
483 bool CMIUtilString::Compare(const CMIUtilString &vrLhs,
484 const CMIUtilString &vrRhs) {
485 // Check the sizes match
486 if (vrLhs.size() != vrRhs.size())
489 return (::strncmp(vrLhs.c_str(), vrRhs.c_str(), vrLhs.size()) == 0);
493 //------------------------------------------------------------------------------------
494 // Details: Remove from either end of *this string the following: " \t\n\v\f\r".
497 // Return: CMIUtilString - Trimmed string.
500 CMIUtilString CMIUtilString::Trim() const {
501 CMIUtilString strNew(*this);
502 const char *pWhiteSpace = " \t\n\v\f\r";
503 const size_t nPos = find_last_not_of(pWhiteSpace);
504 if (nPos != std::string::npos) {
505 strNew = substr(0, nPos + 1);
507 const size_t nPos2 = strNew.find_first_not_of(pWhiteSpace);
508 if (nPos2 != std::string::npos) {
509 strNew = strNew.substr(nPos2);
516 //------------------------------------------------------------------------------------
517 // Details: Remove from either end of *this string the specified character.
520 // Return: CMIUtilString - Trimmed string.
523 CMIUtilString CMIUtilString::Trim(const char vChar) const {
524 CMIUtilString strNew(*this);
525 const size_t nLen = strNew.length();
527 if ((strNew[0] == vChar) && (strNew[nLen - 1] == vChar))
528 strNew = strNew.substr(1, nLen - 2);
535 //------------------------------------------------------------------------------------
536 // Details: Do a printf equivalent for printing a number in binary i.e. "b%llB".
537 // Type: Static method.
538 // Args: vnDecimal - (R) The number to represent in binary.
539 // Return: CMIUtilString - Binary number in text.
542 CMIUtilString CMIUtilString::FormatBinary(const MIuint64 vnDecimal) {
543 CMIUtilString strBinaryNumber;
545 const MIuint nConstBits = 64;
546 MIuint nRem[nConstBits + 1];
549 MIuint64 nNum = vnDecimal;
550 while ((nNum > 0) && (nLen < nConstBits)) {
551 nRem[i++] = nNum % 2;
555 char pN[nConstBits + 1];
557 for (i = nLen; i > 0; --i, j++) {
558 pN[j] = '0' + nRem[i - 1];
560 pN[j] = 0; // String NUL termination
562 strBinaryNumber = CMIUtilString::Format("0b%s", &pN[0]);
564 return strBinaryNumber;
568 //------------------------------------------------------------------------------------
569 // Details: Remove from a string doubled up characters so only one set left.
571 // are only removed if the previous character is already a same
574 // Args: vChar - (R) The character to search for and remove adjacent
576 // Return: CMIUtilString - New version of the string.
579 CMIUtilString CMIUtilString::RemoveRepeatedCharacters(const char vChar) {
580 return RemoveRepeatedCharacters(0, vChar);
584 //------------------------------------------------------------------------------------
585 // Details: Recursively remove from a string doubled up characters so only one
587 // Characters are only removed if the previous character is already a
591 // Args: vChar - (R) The character to search for and remove adjacent
593 // vnPos - Character position in the string.
594 // Return: CMIUtilString - New version of the string.
597 CMIUtilString CMIUtilString::RemoveRepeatedCharacters(size_t vnPos,
599 const char cQuote = '"';
601 // Look for first quote of two
602 const size_t nPos = find(cQuote, vnPos);
603 if (nPos == std::string::npos)
606 const size_t nPosNext = nPos + 1;
607 if (nPosNext > length())
610 if (at(nPosNext) == cQuote) {
611 *this = substr(0, nPos) + substr(nPosNext, length());
612 RemoveRepeatedCharacters(nPosNext, vChar);
619 //------------------------------------------------------------------------------------
620 // Details: Is the text in *this string surrounded by quotes.
623 // Return: bool - True = Yes string is quoted, false = no quoted.
626 bool CMIUtilString::IsQuoted() const {
627 const char cQuote = '"';
632 const size_t nLen = length();
633 return !((nLen > 0) && (at(nLen - 1) != cQuote));
637 //------------------------------------------------------------------------------------
638 // Details: Find first occurrence in *this string which matches the pattern.
640 // Args: vrPattern - (R) The pattern to search for.
641 // vnPos - The starting position at which to start searching.
643 // Return: size_t - The position of the first substring that match.
646 size_t CMIUtilString::FindFirst(const CMIUtilString &vrPattern,
647 size_t vnPos /* = 0 */) const {
648 return find(vrPattern, vnPos);
652 //------------------------------------------------------------------------------------
653 // Details: Find first occurrence in *this string which matches the pattern and
654 // isn't surrounded by quotes.
656 // Args: vrPattern - (R) The pattern to search for.
657 // vbSkipQuotedText - (R) True = don't look at quoted text,
658 // false = otherwise.
659 // vrwbNotFoundClosedQuote - (W) True = parsing error: unmatched
660 // quote, false = otherwise.
661 // vnPos - Position of the first character in the
662 // string to be considered in the search. (Dflt = 0)
663 // Return: size_t - The position of the first substring that matches and isn't
667 size_t CMIUtilString::FindFirst(const CMIUtilString &vrPattern,
668 const bool vbSkipQuotedText,
669 bool &vrwbNotFoundClosedQuote,
670 size_t vnPos /* = 0 */) const {
671 vrwbNotFoundClosedQuote = false;
673 if (!vbSkipQuotedText)
674 return FindFirst(vrPattern, vnPos);
676 const size_t nLen(length());
680 const size_t nQuotePos(FindFirstQuote(nPos));
681 const size_t nPatternPos(FindFirst(vrPattern, nPos));
682 if (nQuotePos == std::string::npos)
685 const size_t nQuoteClosedPos = FindFirstQuote(nQuotePos + 1);
686 if (nQuoteClosedPos == std::string::npos) {
687 vrwbNotFoundClosedQuote = true;
688 return std::string::npos;
691 if ((nPatternPos == std::string::npos) || (nPatternPos < nQuotePos))
694 nPos = nQuoteClosedPos + 1;
695 } while (nPos < nLen);
697 return std::string::npos;
701 //------------------------------------------------------------------------------------
702 // Details: Find first occurrence in *this string which doesn't match the
705 // Args: vrPattern - (R) The pattern to search for.
706 // vnPos - Position of the first character in the string to be
707 // considered in the search. (Dflt = 0)
708 // Return: size_t - The position of the first character that doesn't match.
711 size_t CMIUtilString::FindFirstNot(const CMIUtilString &vrPattern,
712 size_t vnPos /* = 0 */) const {
713 const size_t nLen(length());
714 const size_t nPatternLen(vrPattern.length());
716 size_t nPatternPos(vnPos);
718 const bool bMatchPattern(compare(nPatternPos, nPatternLen, vrPattern) == 0);
721 nPatternPos += nPatternLen;
722 } while (nPatternPos < nLen);
724 return std::string::npos;
728 //------------------------------------------------------------------------------------
729 // Details: Find first occurrence of not escaped quotation mark in *this string.
731 // Args: vnPos - Position of the first character in the string to be
732 // considered in the search.
733 // Return: size_t - The position of the quotation mark.
736 size_t CMIUtilString::FindFirstQuote(size_t vnPos) const {
737 const char cBckSlash('\\');
738 const char cQuote('"');
739 const size_t nLen(length());
743 const size_t nBckSlashPos(find(cBckSlash, nPos));
744 const size_t nQuotePos(find(cQuote, nPos));
745 if ((nBckSlashPos == std::string::npos) || (nQuotePos == std::string::npos))
748 if (nQuotePos < nBckSlashPos)
751 // Skip 2 characters: First is '\', second is that which is escaped by '\'
752 nPos = nBckSlashPos + 2;
753 } while (nPos < nLen);
755 return std::string::npos;
759 //------------------------------------------------------------------------------------
760 // Details: Get escaped string from *this string.
763 // Return: CMIUtilString - The escaped version of the initial string.
766 CMIUtilString CMIUtilString::Escape(bool vbEscapeQuotes /* = false */) const {
767 const size_t nLen(length());
768 CMIUtilString strNew;
769 strNew.reserve(nLen);
770 for (size_t nIndex(0); nIndex < nLen; ++nIndex) {
771 const char cUnescapedChar((*this)[nIndex]);
772 if (cUnescapedChar == '"' && vbEscapeQuotes)
773 strNew.append("\\\"");
775 strNew.append(ConvertToPrintableASCII((char)cUnescapedChar));
781 //------------------------------------------------------------------------------------
782 // Details: Get string with backslashes in front of double quote '"' and
787 // Return: CMIUtilString - The wrapped version of the initial string.
790 CMIUtilString CMIUtilString::AddSlashes() const {
791 const char cBckSlash('\\');
792 const size_t nLen(length());
793 CMIUtilString strNew;
794 strNew.reserve(nLen);
797 while (nOffset < nLen) {
798 const size_t nUnescapedCharPos(find_first_of("\"\\", nOffset));
799 const bool bUnescapedCharNotFound(nUnescapedCharPos == std::string::npos);
800 if (bUnescapedCharNotFound) {
801 const size_t nAppendAll(std::string::npos);
802 strNew.append(*this, nOffset, nAppendAll);
805 const size_t nAppendLen(nUnescapedCharPos - nOffset);
806 strNew.append(*this, nOffset, nAppendLen);
807 strNew.push_back(cBckSlash);
808 const char cUnescapedChar((*this)[nUnescapedCharPos]);
809 strNew.push_back(cUnescapedChar);
810 nOffset = nUnescapedCharPos + 1;
817 //------------------------------------------------------------------------------------
818 // Details: Remove backslashes added by CMIUtilString::AddSlashes.
821 // Return: CMIUtilString - The initial version of wrapped string.
824 CMIUtilString CMIUtilString::StripSlashes() const {
825 const char cBckSlash('\\');
826 const size_t nLen(length());
827 CMIUtilString strNew;
828 strNew.reserve(nLen);
831 while (nOffset < nLen) {
832 const size_t nBckSlashPos(find(cBckSlash, nOffset));
833 const bool bBckSlashNotFound(nBckSlashPos == std::string::npos);
834 if (bBckSlashNotFound) {
835 const size_t nAppendAll(std::string::npos);
836 strNew.append(*this, nOffset, nAppendAll);
839 const size_t nAppendLen(nBckSlashPos - nOffset);
840 strNew.append(*this, nOffset, nAppendLen);
841 const bool bBckSlashIsLast(nBckSlashPos == nLen);
842 if (bBckSlashIsLast) {
843 strNew.push_back(cBckSlash);
846 const char cEscapedChar((*this)[nBckSlashPos + 1]);
847 const size_t nEscapedCharPos(std::string("\"\\").find(cEscapedChar));
848 const bool bEscapedCharNotFound(nEscapedCharPos == std::string::npos);
849 if (bEscapedCharNotFound)
850 strNew.push_back(cBckSlash);
851 strNew.push_back(cEscapedChar);
852 nOffset = nBckSlashPos + 2;
858 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char vChar,
859 bool bEscapeQuotes) {
884 if (::isprint(vChar))
885 return Format("%c", vChar);
887 return Format("\\x%02" PRIx8, vChar);
892 CMIUtilString::ConvertCharValueToPrintableASCII(char vChar,
893 bool bEscapeQuotes) {
918 if (::isprint(vChar))
919 return Format("%c", vChar);
921 return CMIUtilString();
925 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char16_t vChar16,
926 bool bEscapeQuotes) {
927 if (vChar16 == (char16_t)(char)vChar16) {
928 // Convert char16_t to char (if possible)
930 ConvertCharValueToPrintableASCII((char)vChar16, bEscapeQuotes);
931 if (str.length() > 0)
934 return Format("\\u%02" PRIx8 "%02" PRIx8, (vChar16 >> 8) & 0xff,
938 CMIUtilString CMIUtilString::ConvertToPrintableASCII(const char32_t vChar32,
939 bool bEscapeQuotes) {
940 if (vChar32 == (char32_t)(char)vChar32) {
941 // Convert char32_t to char (if possible)
943 ConvertCharValueToPrintableASCII((char)vChar32, bEscapeQuotes);
944 if (str.length() > 0)
947 return Format("\\U%02" PRIx8 "%02" PRIx8 "%02" PRIx8 "%02" PRIx8,
948 (vChar32 >> 24) & 0xff, (vChar32 >> 16) & 0xff,
949 (vChar32 >> 8) & 0xff, vChar32 & 0xff);