1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "lldb/DataFormatters/StringPrinter.h"
12 #include "lldb/Core/DataExtractor.h"
13 #include "lldb/Core/Debugger.h"
14 #include "lldb/Core/Error.h"
15 #include "lldb/Core/ValueObject.h"
16 #include "lldb/Target/Process.h"
17 #include "lldb/Target/Target.h"
19 #include "llvm/Support/ConvertUTF.h"
26 using namespace lldb_private;
27 using namespace lldb_private::formatters;
29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there
30 // and I want the same type to represent both pointers I want to free and pointers I don't need
31 // to free - which is what this class essentially is
32 // It's very specialized to the needs of this file, and not suggested for general use
33 template <typename T = uint8_t, typename U = char, typename S = size_t>
34 struct StringPrinterBufferPointer
38 typedef std::function<void(const T*)> Deleter;
40 StringPrinterBufferPointer (std::nullptr_t ptr) :
46 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) :
52 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) :
58 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) :
61 m_deleter(rhs.m_deleter)
66 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) :
69 m_deleter(rhs.m_deleter)
71 rhs.m_data = nullptr; // this is why m_data has to be mutable
86 ~StringPrinterBufferPointer ()
88 if (m_data && m_deleter)
93 StringPrinterBufferPointer&
94 operator = (const StringPrinterBufferPointer& rhs)
96 if (m_data && m_deleter)
100 m_deleter = rhs.m_deleter;
101 rhs.m_data = nullptr;
106 mutable const T* m_data;
111 // we define this for all values of type but only implement it for those we care about
112 // that's good because we get linker errors for any unsupported type
113 template <StringElementType type>
114 static StringPrinterBufferPointer<>
115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
117 // mimic isprint() for Unicode codepoints
119 isprint(char32_t codepoint)
121 if (codepoint <= 0x1F || codepoint == 0x7F) // C0
125 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
129 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
133 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
137 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
145 StringPrinterBufferPointer<>
146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
148 StringPrinterBufferPointer<> retval = {nullptr};
183 if (isprint(*buffer))
187 uint8_t* data = new uint8_t[5];
188 sprintf((char*)data,"\\x%02x",*buffer);
189 retval = {data, 4, [] (const uint8_t* c) {delete[] c;} };
199 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
201 return (c0-192)*64+(c1-128);
204 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
206 return (c0-224)*4096+(c1-128)*64+(c2-128);
209 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
211 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
215 StringPrinterBufferPointer<>
216 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
218 StringPrinterBufferPointer<> retval {nullptr};
220 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
222 if (1+buffer_end-buffer < utf8_encoded_len)
224 // I don't have enough bytes - print whatever I have left
225 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
230 char32_t codepoint = 0;
231 switch (utf8_encoded_len)
234 // this is just an ASCII byte - ask ASCII
235 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
237 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
240 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
243 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
246 // this is probably some bogus non-character thing
247 // just print it as-is and hope to sync up again soon
288 if (isprint(codepoint))
289 retval = {buffer,utf8_encoded_len};
292 uint8_t* data = new uint8_t[11];
293 sprintf((char*)data,"\\U%08x",codepoint);
294 retval = { data,10,[] (const uint8_t* c) {delete[] c;} };
299 next = buffer + utf8_encoded_len;
303 // this should not happen - but just in case.. try to resync at some point
309 // Given a sequence of bytes, this function returns:
310 // a sequence of bytes to actually print out + a length
311 // the following unscanned position of the buffer is in next
312 static StringPrinterBufferPointer<>
313 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
320 case StringElementType::ASCII:
321 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
322 case StringElementType::UTF8:
323 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
329 // use this call if you already have an LLDB-side buffer for the data
330 template<typename SourceDataType>
332 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
333 const SourceDataType*,
337 const DataExtractor& data,
342 bool escapeNonPrintables)
344 if (prefix_token != 0)
345 stream.Printf("%c",prefix_token);
347 stream.Printf("%c",quote);
348 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
350 const int bufferSPSize = data.GetByteSize();
353 const int origin_encoding = 8*sizeof(SourceDataType);
354 sourceSize = bufferSPSize/(origin_encoding / 4);
357 const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart();
358 const SourceDataType *data_end_ptr = data_ptr + sourceSize;
360 while (data_ptr < data_end_ptr)
364 data_end_ptr = data_ptr;
370 data_ptr = (const SourceDataType*)data.GetDataStart();
372 lldb::DataBufferSP utf8_data_buffer_sp;
373 UTF8* utf8_data_ptr = nullptr;
374 UTF8* utf8_data_end_ptr = nullptr;
378 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
379 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
380 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
381 ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
382 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
386 // just copy the pointers - the cast is necessary to make the compiler happy
387 // but this should only happen if we are reading UTF8 data
388 utf8_data_ptr = (UTF8*)data_ptr;
389 utf8_data_end_ptr = (UTF8*)data_end_ptr;
392 // since we tend to accept partial data (and even partially malformed data)
393 // we might end up with no NULL terminator before the end_ptr
394 // hence we need to take a slower route and ensure we stay within boundaries
395 for (;utf8_data_ptr < utf8_data_end_ptr;)
400 if (escapeNonPrintables)
402 uint8_t* next_data = nullptr;
403 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
404 auto printable_bytes = printable.GetBytes();
405 auto printable_size = printable.GetSize();
406 if (!printable_bytes || !next_data)
408 // GetPrintable() failed on us - print one byte in a desperate resync attempt
409 printable_bytes = utf8_data_ptr;
411 next_data = utf8_data_ptr+1;
413 for (unsigned c = 0; c < printable_size; c++)
414 stream.Printf("%c", *(printable_bytes+c));
415 utf8_data_ptr = (uint8_t*)next_data;
419 stream.Printf("%c",*utf8_data_ptr);
425 stream.Printf("%c",quote);
429 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
430 ReadStringAndDumpToStreamOptions()
432 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
435 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
436 ReadBufferAndDumpToStreamOptions()
438 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
442 namespace lldb_private
450 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options)
452 assert(options.GetStream() && "need a Stream to print the string to");
455 ProcessSP process_sp(options.GetProcessSP());
457 if (process_sp.get() == nullptr || options.GetLocation() == 0)
462 if (options.GetSourceSize() == 0)
463 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
464 else if (!options.GetIgnoreMaxLength())
465 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
467 size = options.GetSourceSize();
469 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
471 process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
476 char prefix_token = options.GetPrefixToken();
477 char quote = options.GetQuote();
479 if (prefix_token != 0)
480 options.GetStream()->Printf("%c%c",prefix_token,quote);
482 options.GetStream()->Printf("%c",quote);
484 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
486 // since we tend to accept partial data (and even partially malformed data)
487 // we might end up with no NULL terminator before the end_ptr
488 // hence we need to take a slower route and ensure we stay within boundaries
489 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
491 if (options.GetEscapeNonPrintables())
493 uint8_t* next_data = nullptr;
494 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
495 auto printable_bytes = printable.GetBytes();
496 auto printable_size = printable.GetSize();
497 if (!printable_bytes || !next_data)
499 // GetPrintable() failed on us - print one byte in a desperate resync attempt
500 printable_bytes = data;
504 for (unsigned c = 0; c < printable_size; c++)
505 options.GetStream()->Printf("%c", *(printable_bytes+c));
506 data = (uint8_t*)next_data;
510 options.GetStream()->Printf("%c",*data);
516 options.GetStream()->Printf("%c",quote);
521 template<typename SourceDataType>
523 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
524 ConversionResult (*ConvertFunction) (const SourceDataType**,
525 const SourceDataType*,
530 assert(options.GetStream() && "need a Stream to print the string to");
532 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
535 lldb::ProcessSP process_sp(options.GetProcessSP());
540 const int type_width = sizeof(SourceDataType);
541 const int origin_encoding = 8 * type_width ;
542 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
544 // if not UTF8, I need a conversion function to return proper UTF8
545 if (origin_encoding != 8 && !ConvertFunction)
548 if (!options.GetStream())
551 uint32_t sourceSize = options.GetSourceSize();
552 bool needs_zero_terminator = options.GetNeedsZeroTermination();
556 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
557 needs_zero_terminator = true;
559 else if (!options.GetIgnoreMaxLength())
560 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
562 const int bufferSPSize = sourceSize * type_width;
564 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
566 if (!buffer_sp->GetBytes())
570 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
572 if (needs_zero_terminator)
573 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
575 process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
579 options.GetStream()->Printf("unable to read data");
583 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
585 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables());
590 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options)
592 return ReadUTFBufferAndDumpToStream<UTF8>(options,
598 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options)
600 return ReadUTFBufferAndDumpToStream<UTF16>(options,
606 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options)
608 return ReadUTFBufferAndDumpToStream<UTF32>(options,
614 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options)
616 assert(options.GetStream() && "need a Stream to print the string to");
618 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
623 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options)
625 // treat ASCII the same as UTF8
626 // FIXME: can we optimize ASCII some more?
627 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
632 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options)
634 assert(options.GetStream() && "need a Stream to print the string to");
636 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
641 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options)
643 assert(options.GetStream() && "need a Stream to print the string to");
645 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
648 } // namespace formatters
650 } // namespace lldb_private