1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "lldb/DataFormatters/StringPrinter.h"
12 #include "lldb/Core/DataExtractor.h"
13 #include "lldb/Core/Debugger.h"
14 #include "lldb/Core/Error.h"
15 #include "lldb/Core/ValueObject.h"
16 #include "lldb/Target/Process.h"
17 #include "lldb/Target/Target.h"
19 #include "llvm/Support/ConvertUTF.h"
26 using namespace lldb_private;
27 using namespace lldb_private::formatters;
29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there
30 // and I want the same type to represent both pointers I want to free and pointers I don't need
31 // to free - which is what this class essentially is
32 // It's very specialized to the needs of this file, and not suggested for general use
33 template <typename T = uint8_t, typename U = char, typename S = size_t>
34 struct StringPrinterBufferPointer
38 typedef std::function<void(const T*)> Deleter;
40 StringPrinterBufferPointer (std::nullptr_t ptr) :
46 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) :
52 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) :
58 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) :
61 m_deleter(rhs.m_deleter)
66 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) :
69 m_deleter(rhs.m_deleter)
71 rhs.m_data = nullptr; // this is why m_data has to be mutable
86 ~StringPrinterBufferPointer ()
88 if (m_data && m_deleter)
93 StringPrinterBufferPointer&
94 operator = (const StringPrinterBufferPointer& rhs)
96 if (m_data && m_deleter)
100 m_deleter = rhs.m_deleter;
101 rhs.m_data = nullptr;
106 mutable const T* m_data;
111 // we define this for all values of type but only implement it for those we care about
112 // that's good because we get linker errors for any unsupported type
113 template <StringElementType type>
114 static StringPrinterBufferPointer<>
115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
117 // mimic isprint() for Unicode codepoints
119 isprint(char32_t codepoint)
121 if (codepoint <= 0x1F || codepoint == 0x7F) // C0
125 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
129 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
133 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
137 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
145 StringPrinterBufferPointer<>
146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
148 StringPrinterBufferPointer<> retval = {nullptr};
183 if (isprint(*buffer))
187 retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} };
188 sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer);
198 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
200 return (c0-192)*64+(c1-128);
203 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
205 return (c0-224)*4096+(c1-128)*64+(c2-128);
208 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
210 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
214 StringPrinterBufferPointer<>
215 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
217 StringPrinterBufferPointer<> retval {nullptr};
219 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
221 if (1+buffer_end-buffer < utf8_encoded_len)
223 // I don't have enough bytes - print whatever I have left
224 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
229 char32_t codepoint = 0;
230 switch (utf8_encoded_len)
233 // this is just an ASCII byte - ask ASCII
234 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
236 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
239 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
242 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
245 // this is probably some bogus non-character thing
246 // just print it as-is and hope to sync up again soon
287 if (isprint(codepoint))
288 retval = {buffer,utf8_encoded_len};
291 retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} };
292 sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint);
297 next = buffer + utf8_encoded_len;
301 // this should not happen - but just in case.. try to resync at some point
307 // Given a sequence of bytes, this function returns:
308 // a sequence of bytes to actually print out + a length
309 // the following unscanned position of the buffer is in next
310 static StringPrinterBufferPointer<>
311 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
318 case StringElementType::ASCII:
319 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
320 case StringElementType::UTF8:
321 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
327 // use this call if you already have an LLDB-side buffer for the data
328 template<typename SourceDataType>
330 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
331 const SourceDataType*,
335 const DataExtractor& data,
340 bool escapeNonPrintables)
342 if (prefix_token != 0)
343 stream.Printf("%c",prefix_token);
345 stream.Printf("%c",quote);
346 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
348 const int bufferSPSize = data.GetByteSize();
351 const int origin_encoding = 8*sizeof(SourceDataType);
352 sourceSize = bufferSPSize/(origin_encoding / 4);
355 SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart();
356 SourceDataType *data_end_ptr = data_ptr + sourceSize;
358 while (data_ptr < data_end_ptr)
362 data_end_ptr = data_ptr;
368 data_ptr = (SourceDataType*)data.GetDataStart();
370 lldb::DataBufferSP utf8_data_buffer_sp;
371 UTF8* utf8_data_ptr = nullptr;
372 UTF8* utf8_data_end_ptr = nullptr;
376 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
377 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
378 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
379 ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
380 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
384 // just copy the pointers - the cast is necessary to make the compiler happy
385 // but this should only happen if we are reading UTF8 data
386 utf8_data_ptr = (UTF8*)data_ptr;
387 utf8_data_end_ptr = (UTF8*)data_end_ptr;
390 // since we tend to accept partial data (and even partially malformed data)
391 // we might end up with no NULL terminator before the end_ptr
392 // hence we need to take a slower route and ensure we stay within boundaries
393 for (;utf8_data_ptr < utf8_data_end_ptr;)
398 if (escapeNonPrintables)
400 uint8_t* next_data = nullptr;
401 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
402 auto printable_bytes = printable.GetBytes();
403 auto printable_size = printable.GetSize();
404 if (!printable_bytes || !next_data)
406 // GetPrintable() failed on us - print one byte in a desperate resync attempt
407 printable_bytes = utf8_data_ptr;
409 next_data = utf8_data_ptr+1;
411 for (unsigned c = 0; c < printable_size; c++)
412 stream.Printf("%c", *(printable_bytes+c));
413 utf8_data_ptr = (uint8_t*)next_data;
417 stream.Printf("%c",*utf8_data_ptr);
423 stream.Printf("%c",quote);
427 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
428 ReadStringAndDumpToStreamOptions()
430 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
433 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
434 ReadBufferAndDumpToStreamOptions()
436 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
440 namespace lldb_private
448 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options)
450 assert(options.GetStream() && "need a Stream to print the string to");
454 ProcessSP process_sp(options.GetProcessSP());
456 if (process_sp.get() == nullptr || options.GetLocation() == 0)
461 if (options.GetSourceSize() == 0)
462 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
463 else if (!options.GetIgnoreMaxLength())
464 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
466 size = options.GetSourceSize();
468 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
470 my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
475 char prefix_token = options.GetPrefixToken();
476 char quote = options.GetQuote();
478 if (prefix_token != 0)
479 options.GetStream()->Printf("%c%c",prefix_token,quote);
481 options.GetStream()->Printf("%c",quote);
483 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
485 // since we tend to accept partial data (and even partially malformed data)
486 // we might end up with no NULL terminator before the end_ptr
487 // hence we need to take a slower route and ensure we stay within boundaries
488 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
490 if (options.GetEscapeNonPrintables())
492 uint8_t* next_data = nullptr;
493 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
494 auto printable_bytes = printable.GetBytes();
495 auto printable_size = printable.GetSize();
496 if (!printable_bytes || !next_data)
498 // GetPrintable() failed on us - print one byte in a desperate resync attempt
499 printable_bytes = data;
503 for (unsigned c = 0; c < printable_size; c++)
504 options.GetStream()->Printf("%c", *(printable_bytes+c));
505 data = (uint8_t*)next_data;
509 options.GetStream()->Printf("%c",*data);
515 options.GetStream()->Printf("%c",quote);
520 template<typename SourceDataType>
522 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
523 ConversionResult (*ConvertFunction) (const SourceDataType**,
524 const SourceDataType*,
529 assert(options.GetStream() && "need a Stream to print the string to");
531 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
534 lldb::ProcessSP process_sp(options.GetProcessSP());
539 const int type_width = sizeof(SourceDataType);
540 const int origin_encoding = 8 * type_width ;
541 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
543 // if not UTF8, I need a conversion function to return proper UTF8
544 if (origin_encoding != 8 && !ConvertFunction)
547 if (!options.GetStream())
550 uint32_t sourceSize = options.GetSourceSize();
551 bool needs_zero_terminator = options.GetNeedsZeroTermination();
555 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
556 needs_zero_terminator = true;
559 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
561 const int bufferSPSize = sourceSize * type_width;
563 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
565 if (!buffer_sp->GetBytes())
569 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
571 size_t data_read = 0;
572 if (needs_zero_terminator)
573 data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
575 data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
579 options.GetStream()->Printf("unable to read data");
583 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
585 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables());
590 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options)
592 return ReadUTFBufferAndDumpToStream<UTF8>(options,
598 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options)
600 return ReadUTFBufferAndDumpToStream<UTF16>(options,
606 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options)
608 return ReadUTFBufferAndDumpToStream<UTF32>(options,
614 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options)
616 assert(options.GetStream() && "need a Stream to print the string to");
618 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
623 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options)
625 // treat ASCII the same as UTF8
626 // FIXME: can we optimize ASCII some more?
627 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
632 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options)
634 assert(options.GetStream() && "need a Stream to print the string to");
636 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
641 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options)
643 assert(options.GetStream() && "need a Stream to print the string to");
645 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
648 } // namespace formatters
650 } // namespace lldb_private