2 * base64.c: base64 encoding and decoding functions
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include <apr_pools.h>
30 #include <apr_general.h> /* for APR_INLINE */
32 #include "svn_pools.h"
34 #include "svn_error.h"
35 #include "svn_base64.h"
36 #include "private/svn_string_private.h"
37 #include "private/svn_subr_private.h"
39 /* When asked to format the base64-encoded output as multiple lines,
40 we put this many chars in each line (plus one new line char) unless
42 It is vital for some of the optimizations below that this value is
44 #define BASE64_LINELEN 76
46 /* This number of bytes is encoded in a line of base64 chars. */
47 #define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)
49 /* Value -> base64 char mapping table (2^6 entries) */
50 static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
51 "abcdefghijklmnopqrstuvwxyz0123456789+/";
54 /* Binary input --> base64-encoded output */
58 unsigned char buf[3]; /* Bytes waiting to be encoded */
59 size_t buflen; /* Number of bytes waiting */
60 size_t linelen; /* Bytes output so far on this line */
61 apr_pool_t *scratch_pool;
65 /* Base64-encode a group. IN needs to have three bytes and OUT needs
66 to have room for four bytes. The input group is treated as four
67 six-bit units which are treated as lookups into base64tab for the
68 bytes of the output group. */
69 static APR_INLINE void
70 encode_group(const unsigned char *in, char *out)
72 /* Expand input bytes to machine word length (with zero extra cost
74 apr_size_t part0 = in[0];
75 apr_size_t part1 = in[1];
76 apr_size_t part2 = in[2];
78 /* ... to prevent these arithmetic operations from being limited to
79 byte size. This saves non-zero cost conversions of the result when
80 calculating the addresses within base64tab. */
81 out[0] = base64tab[part0 >> 2];
82 out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
83 out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
84 out[3] = base64tab[part2 & 0x3f];
87 /* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
88 BASE64_LINELEN chars and append it to STR. It does not assume that
89 a new line char will be appended, though.
90 The code in this function will simply transform the data without
91 performing any boundary checks. Therefore, DATA must have at least
92 BYTES_PER_LINE left and space for at least another BASE64_LINELEN
93 chars must have been pre-allocated in STR before calling this
96 encode_line(svn_stringbuf_t *str, const char *data)
98 /* Translate directly from DATA to STR->DATA. */
99 const unsigned char *in = (const unsigned char *)data;
100 char *out = str->data + str->len;
101 char *end = out + BASE64_LINELEN;
103 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
105 for ( ; out != end; in += 3, out += 4)
106 encode_group(in, out);
108 /* Expand and terminate the string. */
110 str->len += BASE64_LINELEN;
113 /* (Continue to) Base64-encode the byte string DATA (of length LEN)
114 into STR. Include newlines every so often if BREAK_LINES is true.
115 INBUF, INBUFLEN, and LINELEN are used internally; the caller shall
116 make INBUF have room for three characters and initialize *INBUFLEN
119 INBUF and *INBUFLEN carry the leftover data from call to call, and
120 *LINELEN carries the length of the current output line. */
122 encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len,
123 unsigned char *inbuf, size_t *inbuflen, size_t *linelen,
124 svn_boolean_t break_lines)
127 const char *p = data, *end = p + len;
130 /* Resize the stringbuf to make room for the (approximate) size of
131 output, to avoid repeated resizes later.
132 Please note that our optimized code relies on the fact that STR
133 never needs to be resized until we leave this function. */
134 buflen = len * 4 / 3 + 4;
137 /* Add an extra space for line breaks. */
138 buflen += buflen / BASE64_LINELEN;
140 svn_stringbuf_ensure(str, str->len + buflen);
142 /* Keep encoding three-byte groups until we run out. */
143 while (*inbuflen + (end - p) >= 3)
145 /* May we encode BYTES_PER_LINE bytes without caring about
146 line breaks, data in the temporary INBUF or running out
149 && (*linelen == 0 || !break_lines)
150 && (end - p >= BYTES_PER_LINE))
152 /* Yes, we can encode a whole chunk of data at once. */
155 *linelen += BASE64_LINELEN;
159 /* No, this is one of a number of special cases.
160 Encode the data byte by byte. */
161 memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
162 p += (3 - *inbuflen);
163 encode_group(inbuf, group);
164 svn_stringbuf_appendbytes(str, group, 4);
169 /* Add line breaks as necessary. */
170 if (break_lines && *linelen == BASE64_LINELEN)
172 svn_stringbuf_appendbyte(str, '\n');
177 /* Tack any extra input onto *INBUF. */
178 memcpy(inbuf + *inbuflen, p, end - p);
179 *inbuflen += (end - p);
183 /* Encode leftover data, if any, and possibly a final newline (if
184 there has been any data and BREAK_LINES is set), appending to STR.
185 LEN must be in the range 0..2. */
187 encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra,
188 size_t len, size_t linelen, svn_boolean_t break_lines)
190 unsigned char ingroup[3];
195 memcpy(ingroup, extra, len);
196 memset(ingroup + len, 0, 3 - len);
197 encode_group(ingroup, outgroup);
198 memset(outgroup + (len + 1), '=', 4 - (len + 1));
199 svn_stringbuf_appendbytes(str, outgroup, 4);
202 if (break_lines && linelen > 0)
203 svn_stringbuf_appendbyte(str, '\n');
207 /* Write handler for svn_base64_encode. */
209 encode_data(void *baton, const char *data, apr_size_t *len)
211 struct encode_baton *eb = baton;
212 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
214 svn_error_t *err = SVN_NO_ERROR;
216 /* Encode this block of data and write it out. */
217 encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen, TRUE);
218 enclen = encoded->len;
220 err = svn_stream_write(eb->output, encoded->data, &enclen);
221 svn_pool_clear(eb->scratch_pool);
226 /* Close handler for svn_base64_encode(). */
228 finish_encoding_data(void *baton)
230 struct encode_baton *eb = baton;
231 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
233 svn_error_t *err = SVN_NO_ERROR;
235 /* Encode a partial group at the end if necessary, and write it out. */
236 encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen, TRUE);
237 enclen = encoded->len;
239 err = svn_stream_write(eb->output, encoded->data, &enclen);
241 /* Pass on the close request and clean up the baton. */
242 if (err == SVN_NO_ERROR)
243 err = svn_stream_close(eb->output);
244 svn_pool_destroy(eb->scratch_pool);
250 svn_base64_encode(svn_stream_t *output, apr_pool_t *pool)
252 struct encode_baton *eb = apr_palloc(pool, sizeof(*eb));
253 svn_stream_t *stream;
258 eb->scratch_pool = svn_pool_create(pool);
259 stream = svn_stream_create(eb, pool);
260 svn_stream_set_write(stream, encode_data);
261 svn_stream_set_close(stream, finish_encoding_data);
267 svn_base64_encode_string2(const svn_string_t *str,
268 svn_boolean_t break_lines,
271 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool);
272 unsigned char ingroup[3];
273 size_t ingrouplen = 0;
276 encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen,
278 encode_partial_group(encoded, ingroup, ingrouplen, linelen,
280 return svn_stringbuf__morph_into_string(encoded);
284 svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool)
286 return svn_base64_encode_string2(str, TRUE, pool);
291 /* Base64-encoded input --> binary output */
293 struct decode_baton {
294 svn_stream_t *output;
295 unsigned char buf[4]; /* Bytes waiting to be decoded */
296 int buflen; /* Number of bytes waiting */
297 svn_boolean_t done; /* True if we already saw an '=' */
298 apr_pool_t *scratch_pool;
302 /* Base64-decode a group. IN needs to have four bytes and OUT needs
303 to have room for three bytes. The input bytes must already have
304 been decoded from base64tab into the range 0..63. The four
305 six-bit values are pasted together to form three eight-bit bytes. */
306 static APR_INLINE void
307 decode_group(const unsigned char *in, char *out)
309 out[0] = (char)((in[0] << 2) | (in[1] >> 4));
310 out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2));
311 out[2] = (char)(((in[2] & 0x3) << 6) | in[3]);
314 /* Lookup table for base64 characters; reverse_base64[ch] gives a
315 negative value if ch is not a valid base64 character, or otherwise
316 the value of the byte represented; 'A' => 0 etc. */
317 static const signed char reverse_base64[256] = {
318 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
319 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
320 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
321 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
322 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
323 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
324 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
325 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
326 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
327 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
328 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
329 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
330 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
331 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
332 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
333 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
336 /* Similar to decode_group but this function also translates the
337 6-bit values from the IN buffer before translating them.
338 Return FALSE if a non-base64 char (e.g. '=' or new line)
339 has been encountered. */
340 static APR_INLINE svn_boolean_t
341 decode_group_directly(const unsigned char *in, char *out)
343 /* Translate the base64 chars in values [0..63, 0xff] */
344 apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
345 apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
346 apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
347 apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];
349 /* Pack 4x6 bits into 3x8.*/
350 out[0] = (char)((part0 << 2) | (part1 >> 4));
351 out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
352 out[2] = (char)(((part2 & 0x3) << 6) | part3);
354 /* FALSE, iff any part is 0xff. */
355 return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
358 /* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
359 STR. After the function returns, *DATA will point to the first char
360 that has not been translated, yet. Returns TRUE if all BASE64_LINELEN
361 chars could be translated, i.e. no special char has been encountered
363 The code in this function will simply transform the data without
364 performing any boundary checks. Therefore, DATA must have at least
365 BASE64_LINELEN left and space for at least another BYTES_PER_LINE
366 chars must have been pre-allocated in STR before calling this
369 decode_line(svn_stringbuf_t *str, const char **data)
371 /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
372 const unsigned char *p = *(const unsigned char **)data;
373 char *out = str->data + str->len;
374 char *end = out + BYTES_PER_LINE;
376 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
377 a multiple of 4. Stop translation as soon as we encounter a special
378 char. Leave the entire group untouched in that case. */
379 for (; out < end; p += 4, out += 3)
380 if (!decode_group_directly(p, out))
383 /* Update string sizes and positions. */
384 str->len = out - str->data;
386 *data = (const char *)p;
388 /* Return FALSE, if the caller should continue the decoding process
389 using the slow standard method. */
394 /* (Continue to) Base64-decode the byte string DATA (of length LEN)
395 into STR. INBUF, INBUFLEN, and DONE are used internally; the
396 caller shall have room for four bytes in INBUF and initialize
397 *INBUFLEN to 0 and *DONE to FALSE.
399 INBUF and *INBUFLEN carry the leftover bytes from call to call, and
400 *DONE keeps track of whether we've seen an '=' which terminates the
403 decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
404 unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
406 const char *p = data;
409 const char *end = data + len;
411 /* Resize the stringbuf to make room for the maximum size of output,
412 to avoid repeated resizes later. The optimizations in
413 decode_line rely on no resizes being necessary!
415 (*inbuflen+len) is encoded data length
416 (*inbuflen+len)/4 is the number of complete 4-bytes sets
417 (*inbuflen+len)/4*3 is the number of decoded bytes
418 svn_stringbuf_ensure will add an additional byte for the terminating 0.
420 svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3);
422 while ( !*done && p < end )
424 /* If no data is left in temporary INBUF and there is at least
425 one line-sized chunk left to decode, we may use the optimized
427 if ((*inbuflen == 0) && (p + BASE64_LINELEN <= end))
428 if (decode_line(str, &p))
431 /* A special case or decode_line encountered a special char. */
434 /* We are at the end and have to decode a partial group. */
437 memset(inbuf + *inbuflen, 0, 4 - *inbuflen);
438 decode_group(inbuf, group);
439 svn_stringbuf_appendbytes(str, group, *inbuflen - 1);
445 find = reverse_base64[(unsigned char)*p];
449 inbuf[(*inbuflen)++] = find;
452 decode_group(inbuf, group);
453 svn_stringbuf_appendbytes(str, group, 3);
461 /* Write handler for svn_base64_decode. */
463 decode_data(void *baton, const char *data, apr_size_t *len)
465 struct decode_baton *db = baton;
466 svn_stringbuf_t *decoded;
468 svn_error_t *err = SVN_NO_ERROR;
470 /* Decode this block of data. */
471 decoded = svn_stringbuf_create_empty(db->scratch_pool);
472 decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done);
474 /* Write the output, clean up, go home. */
475 declen = decoded->len;
477 err = svn_stream_write(db->output, decoded->data, &declen);
478 svn_pool_clear(db->scratch_pool);
483 /* Close handler for svn_base64_decode(). */
485 finish_decoding_data(void *baton)
487 struct decode_baton *db = baton;
490 /* Pass on the close request and clean up the baton. */
491 err = svn_stream_close(db->output);
492 svn_pool_destroy(db->scratch_pool);
498 svn_base64_decode(svn_stream_t *output, apr_pool_t *pool)
500 struct decode_baton *db = apr_palloc(pool, sizeof(*db));
501 svn_stream_t *stream;
506 db->scratch_pool = svn_pool_create(pool);
507 stream = svn_stream_create(db, pool);
508 svn_stream_set_write(stream, decode_data);
509 svn_stream_set_close(stream, finish_decoding_data);
515 svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool)
517 svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool);
518 unsigned char ingroup[4];
520 svn_boolean_t done = FALSE;
522 decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done);
523 return svn_stringbuf__morph_into_string(decoded);
527 /* Return a base64-encoded representation of CHECKSUM, allocated in POOL.
528 If CHECKSUM->kind is not recognized, return NULL.
529 ### That 'NULL' claim was in the header file when this was public, but
530 doesn't look true in the implementation.
532 ### This is now only used as a new implementation of svn_base64_from_md5();
533 it would probably be safer to revert that to its old implementation. */
534 static svn_stringbuf_t *
535 base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool)
537 svn_stringbuf_t *checksum_str;
538 unsigned char ingroup[3];
539 size_t ingrouplen = 0;
541 checksum_str = svn_stringbuf_create_empty(pool);
543 encode_bytes(checksum_str, checksum->digest,
544 svn_checksum_size(checksum), ingroup, &ingrouplen,
546 encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE);
548 /* Our base64-encoding routines append a final newline if any data
549 was created at all, so let's hack that off. */
550 if (checksum_str->len)
553 checksum_str->data[checksum_str->len] = 0;
561 svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool)
563 svn_checksum_t *checksum
564 = svn_checksum__from_digest_md5(digest, pool);
566 return base64_from_checksum(checksum, pool);