1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===---------------------------------------------------------------------===//
11 /// This file supports working with JSON data.
15 /// - classes which hold dynamically-typed parsed JSON structures
16 /// These are value types that can be composed, inspected, and modified.
17 /// See json::Value, and the related types json::Object and json::Array.
19 /// - functions to parse JSON text into Values, and to serialize Values to text.
20 /// See parse(), operator<<, and format_provider.
22 /// - a convention and helpers for mapping between json::Value and user-defined
23 /// types. See fromJSON(), ObjectMapper, and the class comment on Value.
25 /// Typically, JSON data would be read from an external source, parsed into
26 /// a Value, and then converted into some native data structure before doing
27 /// real work on it. (And vice versa when writing).
29 /// Other serialization mechanisms you may consider:
31 /// - YAML is also text-based, and more human-readable than JSON. It's a more
32 /// complex format and data model, and YAML parsers aren't ubiquitous.
33 /// YAMLParser.h is a streaming parser suitable for parsing large documents
34 /// (including JSON, as YAML is a superset). It can be awkward to use
35 /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
36 /// declarative than the toJSON/fromJSON conventions here.
38 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
39 /// encodes LLVM IR ("bitcode"), but it can be a container for other data.
40 /// Low-level reader/writer libraries are in Bitcode/Bitstream*.h
42 //===---------------------------------------------------------------------===//
44 #ifndef LLVM_SUPPORT_JSON_H
45 #define LLVM_SUPPORT_JSON_H
47 #include "llvm/ADT/DenseMap.h"
48 #include "llvm/ADT/SmallVector.h"
49 #include "llvm/ADT/StringRef.h"
50 #include "llvm/Support/Error.h"
51 #include "llvm/Support/FormatVariadic.h"
52 #include "llvm/Support/raw_ostream.h"
58 // === String encodings ===
60 // JSON strings are character sequences (not byte sequences like std::string).
61 // We need to know the encoding, and for simplicity only support UTF-8.
63 // - When parsing, invalid UTF-8 is a syntax error like any other
65 // - When creating Values from strings, callers must ensure they are UTF-8.
66 // with asserts on, invalid UTF-8 will crash the program
67 // with asserts off, we'll substitute the replacement character (U+FFFD)
68 // Callers can use json::isUTF8() and json::fixUTF8() for validation.
70 // - When retrieving strings from Values (e.g. asString()), the result will
71 // always be valid UTF-8.
73 /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
74 /// If it returns false, \p Offset is set to a byte offset near the first error.
75 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
76 /// Replaces invalid UTF-8 sequences in \p S with the replacement character
77 /// (U+FFFD). The returned string is valid UTF-8.
78 /// This is much slower than isUTF8, so test that first.
79 std::string fixUTF8(llvm::StringRef S);
84 template <typename T> Value toJSON(const llvm::Optional<T> &Opt);
86 /// An Object is a JSON object, which maps strings to heterogenous JSON values.
87 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
89 using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
93 using key_type = ObjectKey;
94 using mapped_type = Value;
95 using value_type = Storage::value_type;
96 using iterator = Storage::iterator;
97 using const_iterator = Storage::const_iterator;
99 explicit Object() = default;
100 // KV is a trivial key-value struct for list-initialization.
101 // (using std::pair forces extra copies).
103 explicit Object(std::initializer_list<KV> Properties);
105 iterator begin() { return M.begin(); }
106 const_iterator begin() const { return M.begin(); }
107 iterator end() { return M.end(); }
108 const_iterator end() const { return M.end(); }
110 bool empty() const { return M.empty(); }
111 size_t size() const { return M.size(); }
113 void clear() { M.clear(); }
114 std::pair<iterator, bool> insert(KV E);
115 template <typename... Ts>
116 std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
117 return M.try_emplace(K, std::forward<Ts>(Args)...);
119 template <typename... Ts>
120 std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
121 return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
124 iterator find(StringRef K) { return M.find_as(K); }
125 const_iterator find(StringRef K) const { return M.find_as(K); }
126 // operator[] acts as if Value was default-constructible as null.
127 Value &operator[](const ObjectKey &K);
128 Value &operator[](ObjectKey &&K);
129 // Look up a property, returning nullptr if it doesn't exist.
130 Value *get(StringRef K);
131 const Value *get(StringRef K) const;
132 // Typed accessors return None/nullptr if
133 // - the property doesn't exist
134 // - or it has the wrong type
135 llvm::Optional<std::nullptr_t> getNull(StringRef K) const;
136 llvm::Optional<bool> getBoolean(StringRef K) const;
137 llvm::Optional<double> getNumber(StringRef K) const;
138 llvm::Optional<int64_t> getInteger(StringRef K) const;
139 llvm::Optional<llvm::StringRef> getString(StringRef K) const;
140 const json::Object *getObject(StringRef K) const;
141 json::Object *getObject(StringRef K);
142 const json::Array *getArray(StringRef K) const;
143 json::Array *getArray(StringRef K);
145 bool operator==(const Object &LHS, const Object &RHS);
146 inline bool operator!=(const Object &LHS, const Object &RHS) {
147 return !(LHS == RHS);
150 /// An Array is a JSON array, which contains heterogeneous JSON values.
151 /// It simulates std::vector<Value>.
153 std::vector<Value> V;
156 using value_type = Value;
157 using iterator = std::vector<Value>::iterator;
158 using const_iterator = std::vector<Value>::const_iterator;
160 explicit Array() = default;
161 explicit Array(std::initializer_list<Value> Elements);
162 template <typename Collection> explicit Array(const Collection &C) {
163 for (const auto &V : C)
167 Value &operator[](size_t I) { return V[I]; }
168 const Value &operator[](size_t I) const { return V[I]; }
169 Value &front() { return V.front(); }
170 const Value &front() const { return V.front(); }
171 Value &back() { return V.back(); }
172 const Value &back() const { return V.back(); }
173 Value *data() { return V.data(); }
174 const Value *data() const { return V.data(); }
176 iterator begin() { return V.begin(); }
177 const_iterator begin() const { return V.begin(); }
178 iterator end() { return V.end(); }
179 const_iterator end() const { return V.end(); }
181 bool empty() const { return V.empty(); }
182 size_t size() const { return V.size(); }
184 void clear() { V.clear(); }
185 void push_back(const Value &E) { V.push_back(E); }
186 void push_back(Value &&E) { V.push_back(std::move(E)); }
187 template <typename... Args> void emplace_back(Args &&... A) {
188 V.emplace_back(std::forward<Args>(A)...);
190 void pop_back() { V.pop_back(); }
191 // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees.
192 iterator insert(iterator P, const Value &E) { return V.insert(P, E); }
193 iterator insert(iterator P, Value &&E) {
194 return V.insert(P, std::move(E));
196 template <typename It> iterator insert(iterator P, It A, It Z) {
197 return V.insert(P, A, Z);
199 template <typename... Args> iterator emplace(const_iterator P, Args &&... A) {
200 return V.emplace(P, std::forward<Args>(A)...);
203 friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
205 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
207 /// A Value is an JSON value of unknown type.
208 /// They can be copied, but should generally be moved.
210 /// === Composing values ===
212 /// You can implicitly construct Values from:
213 /// - strings: std::string, SmallString, formatv, StringRef, char*
214 /// (char*, and StringRef are references, not copies!)
218 /// - arrays: {"foo", 42.0, false}
219 /// - serializable things: types with toJSON(const T&)->Value, found by ADL
221 /// They can also be constructed from object/array helpers:
222 /// - json::Object is a type like map<ObjectKey, Value>
223 /// - json::Array is a type like vector<Value>
224 /// These can be list-initialized, or used to build up collections in a loop.
225 /// json::ary(Collection) converts all items in a collection to Values.
227 /// === Inspecting values ===
229 /// Each Value is one of the JSON kinds:
232 /// number (double or int64)
233 /// string (StringRef)
234 /// array (json::Array)
235 /// object (json::Object)
237 /// The kind can be queried directly, or implicitly via the typed accessors:
238 /// if (Optional<StringRef> S = E.getAsString()
239 /// assert(E.kind() == Value::String);
241 /// Array and Object also have typed indexing accessors for easy traversal:
242 /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
243 /// if (Object* O = E->getAsObject())
244 /// if (Object* Opts = O->getObject("options"))
245 /// if (Optional<StringRef> Font = Opts->getString("font"))
246 /// assert(Opts->at("font").kind() == Value::String);
248 /// === Converting JSON values to C++ types ===
250 /// The convention is to have a deserializer function findable via ADL:
251 /// fromJSON(const json::Value&, T&)->bool
252 /// Deserializers are provided for:
254 /// - int and int64_t
257 /// - vector<T>, where T is deserializable
258 /// - map<string, T>, where T is deserializable
259 /// - Optional<T>, where T is deserializable
260 /// ObjectMapper can help writing fromJSON() functions for object types.
262 /// For conversion in the other direction, the serializer function is:
263 /// toJSON(const T&) -> json::Value
264 /// If this exists, then it also allows constructing Value from T, and can
265 /// be used to serialize vector<T>, map<string, T>, and Optional<T>.
267 /// === Serialization ===
269 /// Values can be serialized to JSON:
270 /// 1) raw_ostream << Value // Basic formatting.
271 /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting.
272 /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2.
275 /// Expected<Value> E = json::parse("[1, 2, null]");
276 /// assert(E && E->kind() == Value::Array);
282 /// Number values can store both int64s and doubles at full precision,
283 /// depending on what they were constructed/parsed from.
290 // It would be nice to have Value() be null. But that would make {} null too.
291 Value(const Value &M) { copyFrom(M); }
292 Value(Value &&M) { moveFrom(std::move(M)); }
293 Value(std::initializer_list<Value> Elements);
294 Value(json::Array &&Elements) : Type(T_Array) {
295 create<json::Array>(std::move(Elements));
297 Value(json::Object &&Properties) : Type(T_Object) {
298 create<json::Object>(std::move(Properties));
300 // Strings: types with value semantics. Must be valid UTF-8.
301 Value(std::string V) : Type(T_String) {
302 if (LLVM_UNLIKELY(!isUTF8(V))) {
303 assert(false && "Invalid UTF-8 in value used as JSON");
304 V = fixUTF8(std::move(V));
306 create<std::string>(std::move(V));
308 Value(const llvm::SmallVectorImpl<char> &V)
309 : Value(std::string(V.begin(), V.end())){};
310 Value(const llvm::formatv_object_base &V) : Value(V.str()){};
311 // Strings: types with reference semantics. Must be valid UTF-8.
312 Value(StringRef V) : Type(T_StringRef) {
313 create<llvm::StringRef>(V);
314 if (LLVM_UNLIKELY(!isUTF8(V))) {
315 assert(false && "Invalid UTF-8 in value used as JSON");
316 *this = Value(fixUTF8(V));
319 Value(const char *V) : Value(StringRef(V)) {}
320 Value(std::nullptr_t) : Type(T_Null) {}
321 // Boolean (disallow implicit conversions).
322 // (The last template parameter is a dummy to keep templates distinct.)
325 typename = typename std::enable_if<std::is_same<T, bool>::value>::type,
327 Value(T B) : Type(T_Boolean) {
330 // Integers (except boolean). Must be non-narrowing convertible to int64_t.
333 typename = typename std::enable_if<std::is_integral<T>::value>::type,
334 typename = typename std::enable_if<!std::is_same<T, bool>::value>::type>
335 Value(T I) : Type(T_Integer) {
336 create<int64_t>(int64_t{I});
338 // Floating point. Must be non-narrowing convertible to double.
339 template <typename T,
341 typename std::enable_if<std::is_floating_point<T>::value>::type,
343 Value(T D) : Type(T_Double) {
344 create<double>(double{D});
346 // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
347 template <typename T,
348 typename = typename std::enable_if<std::is_same<
349 Value, decltype(toJSON(*(const T *)nullptr))>::value>,
351 Value(const T &V) : Value(toJSON(V)) {}
353 Value &operator=(const Value &M) {
358 Value &operator=(Value &&M) {
360 moveFrom(std::move(M));
363 ~Value() { destroy(); }
382 llvm_unreachable("Unknown kind");
385 // Typed accessors return None/nullptr if the Value is not of this type.
386 llvm::Optional<std::nullptr_t> getAsNull() const {
387 if (LLVM_LIKELY(Type == T_Null))
391 llvm::Optional<bool> getAsBoolean() const {
392 if (LLVM_LIKELY(Type == T_Boolean))
396 llvm::Optional<double> getAsNumber() const {
397 if (LLVM_LIKELY(Type == T_Double))
399 if (LLVM_LIKELY(Type == T_Integer))
400 return as<int64_t>();
403 // Succeeds if the Value is a Number, and exactly representable as int64_t.
404 llvm::Optional<int64_t> getAsInteger() const {
405 if (LLVM_LIKELY(Type == T_Integer))
406 return as<int64_t>();
407 if (LLVM_LIKELY(Type == T_Double)) {
408 double D = as<double>();
409 if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
410 D >= double(std::numeric_limits<int64_t>::min()) &&
411 D <= double(std::numeric_limits<int64_t>::max())))
416 llvm::Optional<llvm::StringRef> getAsString() const {
417 if (Type == T_String)
418 return llvm::StringRef(as<std::string>());
419 if (LLVM_LIKELY(Type == T_StringRef))
420 return as<llvm::StringRef>();
423 const json::Object *getAsObject() const {
424 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
426 json::Object *getAsObject() {
427 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
429 const json::Array *getAsArray() const {
430 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
432 json::Array *getAsArray() {
433 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
436 /// Serializes this Value to JSON, writing it to the provided stream.
437 /// The formatting is compact (no extra whitespace) and deterministic.
438 /// For pretty-printing, use the formatv() format_provider below.
439 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
443 void copyFrom(const Value &M);
444 // We allow moving from *const* Values, by marking all members as mutable!
445 // This hack is needed to support initializer-list syntax efficiently.
446 // (std::initializer_list<T> is a container of const T).
447 void moveFrom(const Value &&M);
451 template <typename T, typename... U> void create(U &&... V) {
452 new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...);
454 template <typename T> T &as() const {
455 return *reinterpret_cast<T *>(Union.buffer);
458 template <typename Indenter>
459 void print(llvm::raw_ostream &, const Indenter &) const;
460 friend struct llvm::format_provider<llvm::json::Value>;
462 enum ValueType : char {
472 // All members mutable, see moveFrom().
473 mutable ValueType Type;
474 mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
475 std::string, json::Array, json::Object>
479 bool operator==(const Value &, const Value &);
480 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
481 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
483 /// ObjectKey is a used to capture keys in Object. Like Value but:
484 /// - only strings are allowed
485 /// - it's optimized for the string literal case (Owned == nullptr)
486 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
489 ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
490 ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
491 if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
492 assert(false && "Invalid UTF-8 in value used as JSON");
493 *Owned = fixUTF8(std::move(*Owned));
497 ObjectKey(llvm::StringRef S) : Data(S) {
498 if (LLVM_UNLIKELY(!isUTF8(Data))) {
499 assert(false && "Invalid UTF-8 in value used as JSON");
500 *this = ObjectKey(fixUTF8(S));
503 ObjectKey(const llvm::SmallVectorImpl<char> &V)
504 : ObjectKey(std::string(V.begin(), V.end())) {}
505 ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
507 ObjectKey(const ObjectKey &C) { *this = C; }
508 ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
509 ObjectKey &operator=(const ObjectKey &C) {
511 Owned.reset(new std::string(*C.Owned));
518 ObjectKey &operator=(ObjectKey &&) = default;
520 operator llvm::StringRef() const { return Data; }
521 std::string str() const { return Data.str(); }
524 // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
525 // could be 2 pointers at most.
526 std::unique_ptr<std::string> Owned;
527 llvm::StringRef Data;
530 inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
531 return llvm::StringRef(L) == llvm::StringRef(R);
533 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
536 inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
537 return StringRef(L) < StringRef(R);
545 inline Object::Object(std::initializer_list<KV> Properties) {
546 for (const auto &P : Properties) {
547 auto R = try_emplace(P.K, nullptr);
549 R.first->getSecond().moveFrom(std::move(P.V));
552 inline std::pair<Object::iterator, bool> Object::insert(KV E) {
553 return try_emplace(std::move(E.K), std::move(E.V));
556 // Standard deserializers are provided for primitive types.
557 // See comments on Value.
558 inline bool fromJSON(const Value &E, std::string &Out) {
559 if (auto S = E.getAsString()) {
565 inline bool fromJSON(const Value &E, int &Out) {
566 if (auto S = E.getAsInteger()) {
572 inline bool fromJSON(const Value &E, int64_t &Out) {
573 if (auto S = E.getAsInteger()) {
579 inline bool fromJSON(const Value &E, double &Out) {
580 if (auto S = E.getAsNumber()) {
586 inline bool fromJSON(const Value &E, bool &Out) {
587 if (auto S = E.getAsBoolean()) {
593 template <typename T> bool fromJSON(const Value &E, llvm::Optional<T> &Out) {
599 if (!fromJSON(E, Result))
601 Out = std::move(Result);
604 template <typename T> bool fromJSON(const Value &E, std::vector<T> &Out) {
605 if (auto *A = E.getAsArray()) {
607 Out.resize(A->size());
608 for (size_t I = 0; I < A->size(); ++I)
609 if (!fromJSON((*A)[I], Out[I]))
615 template <typename T>
616 bool fromJSON(const Value &E, std::map<std::string, T> &Out) {
617 if (auto *O = E.getAsObject()) {
619 for (const auto &KV : *O)
620 if (!fromJSON(KV.second, Out[llvm::StringRef(KV.first)]))
627 // Allow serialization of Optional<T> for supported T.
628 template <typename T> Value toJSON(const llvm::Optional<T> &Opt) {
629 return Opt ? Value(*Opt) : Value(nullptr);
632 /// Helper for mapping JSON objects onto protocol structs.
636 /// bool fromJSON(const Value &E, MyStruct &R) {
637 /// ObjectMapper O(E);
638 /// if (!O || !O.map("mandatory_field", R.MandatoryField))
640 /// O.map("optional_field", R.OptionalField);
646 ObjectMapper(const Value &E) : O(E.getAsObject()) {}
648 /// True if the expression is an object.
649 /// Must be checked before calling map().
650 operator bool() { return O; }
652 /// Maps a property to a field, if it exists.
653 template <typename T> bool map(StringRef Prop, T &Out) {
654 assert(*this && "Must check this is an object before calling map()");
655 if (const Value *E = O->get(Prop))
656 return fromJSON(*E, Out);
660 /// Maps a property to a field, if it exists.
661 /// (Optional requires special handling, because missing keys are OK).
662 template <typename T> bool map(StringRef Prop, llvm::Optional<T> &Out) {
663 assert(*this && "Must check this is an object before calling map()");
664 if (const Value *E = O->get(Prop))
665 return fromJSON(*E, Out);
674 /// Parses the provided JSON source, or returns a ParseError.
675 /// The returned Value is self-contained and owns its strings (they do not refer
676 /// to the original source).
677 llvm::Expected<Value> parse(llvm::StringRef JSON);
679 class ParseError : public llvm::ErrorInfo<ParseError> {
681 unsigned Line, Column, Offset;
685 ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
686 : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
687 void log(llvm::raw_ostream &OS) const override {
688 OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
690 std::error_code convertToErrorCode() const override {
691 return llvm::inconvertibleErrorCode();
696 /// Allow printing json::Value with formatv().
697 /// The default style is basic/compact formatting, like operator<<.
698 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
699 template <> struct format_provider<llvm::json::Value> {
700 static void format(const llvm::json::Value &, raw_ostream &, StringRef);