1 // Copyright 2014 The Kyua Authors.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "utils/text/regex.hpp"
32 #include <sys/types.h>
37 #include "utils/auto_array.ipp"
38 #include "utils/defs.hpp"
39 #include "utils/format/macros.hpp"
40 #include "utils/noncopyable.hpp"
41 #include "utils/sanity.hpp"
42 #include "utils/text/exceptions.hpp"
44 namespace text = utils::text;
50 static void throw_regex_error(const int, const ::regex_t*, const std::string&)
54 /// Constructs and raises a regex_error.
56 /// \param error The error code returned by regcomp(3) or regexec(3).
57 /// \param preg The native regex object that caused this error.
58 /// \param prefix Error message prefix string.
60 /// \throw regex_error The constructed exception.
62 throw_regex_error(const int error, const ::regex_t* preg,
63 const std::string& prefix)
67 // TODO(jmmv): Would be nice to handle the case where the message does
68 // not fit in the temporary buffer.
69 (void)::regerror(error, preg, buffer, sizeof(buffer));
71 throw text::regex_error(F("%s: %s") % prefix % buffer);
75 } // anonymous namespace
78 /// Internal implementation for regex_matches.
79 struct utils::text::regex_matches::impl : utils::noncopyable {
80 /// String on which we are matching.
82 /// In theory, we could take a reference here instead of a copy, and make
83 /// it a requirement for the caller to ensure that the lifecycle of the
84 /// input string outlasts the lifecycle of the regex_matches. However, that
85 /// contract is very easy to break with hardcoded strings (as we do in
86 /// tests). Just go for the safer case here.
87 const std::string _string;
89 /// Maximum number of matching groups we expect, including the full match.
91 /// In other words, this is the size of the _matches array.
92 const std::size_t _nmatches;
94 /// Native regular expression match representation.
95 utils::auto_array< ::regmatch_t > _matches;
99 /// This executes the regex on the given string and sets up the internal
100 /// class state based on the results.
102 /// \param preg The native regex object.
103 /// \param str The string on which to execute the regex.
104 /// \param ngroups Number of capture groups in the regex. This is an upper
105 /// bound and may be greater than the actual matches.
107 /// \throw regex_error If the call to regexec(3) fails.
108 impl(const ::regex_t* preg, const std::string& str,
109 const std::size_t ngroups) :
111 _nmatches(ngroups + 1),
112 _matches(new ::regmatch_t[_nmatches])
114 const int error = ::regexec(preg, _string.c_str(), _nmatches,
116 if (error == REG_NOMATCH) {
117 _matches.reset(NULL);
118 } else if (error != 0) {
119 throw_regex_error(error, preg,
120 F("regexec on '%s' failed") % _string);
133 /// \param pimpl Constructed implementation of the object.
134 text::regex_matches::regex_matches(std::shared_ptr< impl > pimpl) :
141 text::regex_matches::~regex_matches(void)
146 /// Returns the number of matches in this object.
148 /// Note that this does not correspond to the number of groups provided at
149 /// construction time. The returned value here accounts for only the returned
152 /// \return Number of matches, including the full match.
154 text::regex_matches::count(void) const
156 std::size_t total = 0;
157 if (_pimpl->_matches.get() != NULL) {
158 for (std::size_t i = 0; i < _pimpl->_nmatches; ++i) {
159 if (_pimpl->_matches[i].rm_so != -1)
162 INV(total <= _pimpl->_nmatches);
170 /// \param index Number of the match to get. Index 0 always contains the match
171 /// of the whole regex.
173 /// \pre There regex must have matched the input string.
174 /// \pre index must be lower than count().
176 /// \return The textual match.
178 text::regex_matches::get(const std::size_t index) const
181 PRE(index < count());
183 const ::regmatch_t* match = &_pimpl->_matches[index];
185 return std::string(_pimpl->_string.c_str() + match->rm_so,
186 match->rm_eo - match->rm_so);
190 /// Checks if there are any matches.
192 /// \return True if the object contains one or more matches; false otherwise.
193 text::regex_matches::operator bool(void) const
195 return _pimpl->_matches.get() != NULL;
199 /// Internal implementation for regex.
200 struct utils::text::regex::impl : utils::noncopyable {
201 /// Native regular expression representation.
204 /// Number of capture groups in the regular expression. This is an upper
205 /// bound and does NOT include the default full string match.
206 std::size_t _ngroups;
210 /// This compiles the given regular expression.
212 /// \param regex_ The regular expression to compile.
213 /// \param ngroups Number of capture groups in the regular expression. This
214 /// is an upper bound and does NOT include the default full string
216 /// \param ignore_case Whether to ignore case during matching.
218 /// \throw regex_error If the call to regcomp(3) fails.
219 impl(const std::string& regex_, const std::size_t ngroups,
220 const bool ignore_case) :
223 const int flags = REG_EXTENDED | (ignore_case ? REG_ICASE : 0);
224 const int error = ::regcomp(&_preg, regex_.c_str(), flags);
226 throw_regex_error(error, &_preg, F("regcomp on '%s' failed")
240 /// \param pimpl Constructed implementation of the object.
241 text::regex::regex(std::shared_ptr< impl > pimpl) : _pimpl(pimpl)
247 text::regex::~regex(void)
252 /// Compiles a new regular expression.
254 /// \param regex_ The regular expression to compile.
255 /// \param ngroups Number of capture groups in the regular expression. This is
256 /// an upper bound and does NOT include the default full string match.
257 /// \param ignore_case Whether to ignore case during matching.
259 /// \return A new regular expression, ready to match strings.
261 /// \throw regex_error If the regular expression is invalid and cannot be
264 text::regex::compile(const std::string& regex_, const std::size_t ngroups,
265 const bool ignore_case)
267 return regex(std::shared_ptr< impl >(new impl(regex_, ngroups,
272 /// Matches the regular expression against a string.
274 /// \param str String to match the regular expression against.
276 /// \return A new regex_matches object with the results of the match.
278 text::regex::match(const std::string& str) const
280 std::shared_ptr< regex_matches::impl > pimpl(new regex_matches::impl(
281 &_pimpl->_preg, str, _pimpl->_ngroups));
282 return regex_matches(pimpl);
286 /// Compiles and matches a regular expression once.
288 /// This is syntactic sugar to simplify the instantiation of a new regex object
289 /// and its subsequent match on a string.
291 /// \param regex_ The regular expression to compile and match.
292 /// \param str String to match the regular expression against.
293 /// \param ngroups Number of capture groups in the regular expression.
294 /// \param ignore_case Whether to ignore case during matching.
296 /// \return A new regex_matches object with the results of the match.
298 text::match_regex(const std::string& regex_, const std::string& str,
299 const std::size_t ngroups, const bool ignore_case)
301 return regex::compile(regex_, ngroups, ignore_case).match(str);