2 * base64.c: base64 encoding and decoding functions
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include <apr_pools.h>
30 #include <apr_general.h> /* for APR_INLINE */
32 #include "svn_pools.h"
34 #include "svn_error.h"
35 #include "svn_base64.h"
36 #include "private/svn_string_private.h"
37 #include "private/svn_subr_private.h"
39 /* When asked to format the base64-encoded output as multiple lines,
40 we put this many chars in each line (plus one new line char) unless
42 It is vital for some of the optimizations below that this value is
44 #define BASE64_LINELEN 76
46 /* This number of bytes is encoded in a line of base64 chars. */
47 #define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)
49 /* Value -> base64 char mapping table (2^6 entries) */
50 static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
51 "abcdefghijklmnopqrstuvwxyz0123456789+/";
54 /* Binary input --> base64-encoded output */
58 unsigned char buf[3]; /* Bytes waiting to be encoded */
59 size_t buflen; /* Number of bytes waiting */
60 size_t linelen; /* Bytes output so far on this line */
61 svn_boolean_t break_lines;
62 apr_pool_t *scratch_pool;
66 /* Base64-encode a group. IN needs to have three bytes and OUT needs
67 to have room for four bytes. The input group is treated as four
68 six-bit units which are treated as lookups into base64tab for the
69 bytes of the output group. */
70 static APR_INLINE void
71 encode_group(const unsigned char *in, char *out)
73 /* Expand input bytes to machine word length (with zero extra cost
75 apr_size_t part0 = in[0];
76 apr_size_t part1 = in[1];
77 apr_size_t part2 = in[2];
79 /* ... to prevent these arithmetic operations from being limited to
80 byte size. This saves non-zero cost conversions of the result when
81 calculating the addresses within base64tab. */
82 out[0] = base64tab[part0 >> 2];
83 out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
84 out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
85 out[3] = base64tab[part2 & 0x3f];
88 /* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
89 BASE64_LINELEN chars and append it to STR. It does not assume that
90 a new line char will be appended, though.
91 The code in this function will simply transform the data without
92 performing any boundary checks. Therefore, DATA must have at least
93 BYTES_PER_LINE left and space for at least another BASE64_LINELEN
94 chars must have been pre-allocated in STR before calling this
97 encode_line(svn_stringbuf_t *str, const char *data)
99 /* Translate directly from DATA to STR->DATA. */
100 const unsigned char *in = (const unsigned char *)data;
101 char *out = str->data + str->len;
102 char *end = out + BASE64_LINELEN;
104 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
106 for ( ; out != end; in += 3, out += 4)
107 encode_group(in, out);
109 /* Expand and terminate the string. */
111 str->len += BASE64_LINELEN;
114 /* (Continue to) Base64-encode the byte string DATA (of length LEN)
115 into STR. Include newlines every so often if BREAK_LINES is true.
116 INBUF, INBUFLEN, and LINELEN are used internally; the caller shall
117 make INBUF have room for three characters and initialize *INBUFLEN
120 INBUF and *INBUFLEN carry the leftover data from call to call, and
121 *LINELEN carries the length of the current output line. */
123 encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len,
124 unsigned char *inbuf, size_t *inbuflen, size_t *linelen,
125 svn_boolean_t break_lines)
128 const char *p = data, *end = p + len;
131 /* Resize the stringbuf to make room for the (approximate) size of
132 output, to avoid repeated resizes later.
133 Please note that our optimized code relies on the fact that STR
134 never needs to be resized until we leave this function. */
135 buflen = len * 4 / 3 + 4;
138 /* Add an extra space for line breaks. */
139 buflen += buflen / BASE64_LINELEN;
141 svn_stringbuf_ensure(str, str->len + buflen);
143 /* Keep encoding three-byte groups until we run out. */
144 while ((end - p) >= (3 - *inbuflen))
146 /* May we encode BYTES_PER_LINE bytes without caring about
147 line breaks, data in the temporary INBUF or running out
150 && (*linelen == 0 || !break_lines)
151 && (end - p >= BYTES_PER_LINE))
153 /* Yes, we can encode a whole chunk of data at once. */
156 *linelen += BASE64_LINELEN;
160 /* No, this is one of a number of special cases.
161 Encode the data byte by byte. */
162 memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
163 p += (3 - *inbuflen);
164 encode_group(inbuf, group);
165 svn_stringbuf_appendbytes(str, group, 4);
170 /* Add line breaks as necessary. */
171 if (break_lines && *linelen == BASE64_LINELEN)
173 svn_stringbuf_appendbyte(str, '\n');
178 /* Tack any extra input onto *INBUF. */
179 memcpy(inbuf + *inbuflen, p, end - p);
180 *inbuflen += (end - p);
184 /* Encode leftover data, if any, and possibly a final newline (if
185 there has been any data and BREAK_LINES is set), appending to STR.
186 LEN must be in the range 0..2. */
188 encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra,
189 size_t len, size_t linelen, svn_boolean_t break_lines)
191 unsigned char ingroup[3];
196 memcpy(ingroup, extra, len);
197 memset(ingroup + len, 0, 3 - len);
198 encode_group(ingroup, outgroup);
199 memset(outgroup + (len + 1), '=', 4 - (len + 1));
200 svn_stringbuf_appendbytes(str, outgroup, 4);
203 if (break_lines && linelen > 0)
204 svn_stringbuf_appendbyte(str, '\n');
208 /* Write handler for svn_base64_encode. */
210 encode_data(void *baton, const char *data, apr_size_t *len)
212 struct encode_baton *eb = baton;
213 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
215 svn_error_t *err = SVN_NO_ERROR;
217 /* Encode this block of data and write it out. */
218 encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen,
220 enclen = encoded->len;
222 err = svn_stream_write(eb->output, encoded->data, &enclen);
223 svn_pool_clear(eb->scratch_pool);
228 /* Close handler for svn_base64_encode(). */
230 finish_encoding_data(void *baton)
232 struct encode_baton *eb = baton;
233 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
235 svn_error_t *err = SVN_NO_ERROR;
237 /* Encode a partial group at the end if necessary, and write it out. */
238 encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen,
240 enclen = encoded->len;
242 err = svn_stream_write(eb->output, encoded->data, &enclen);
244 /* Pass on the close request and clean up the baton. */
245 if (err == SVN_NO_ERROR)
246 err = svn_stream_close(eb->output);
247 svn_pool_destroy(eb->scratch_pool);
253 svn_base64_encode2(svn_stream_t *output,
254 svn_boolean_t break_lines,
257 struct encode_baton *eb = apr_palloc(pool, sizeof(*eb));
258 svn_stream_t *stream;
263 eb->break_lines = break_lines;
264 eb->scratch_pool = svn_pool_create(pool);
265 stream = svn_stream_create(eb, pool);
266 svn_stream_set_write(stream, encode_data);
267 svn_stream_set_close(stream, finish_encoding_data);
273 svn_base64_encode_string2(const svn_string_t *str,
274 svn_boolean_t break_lines,
277 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool);
278 unsigned char ingroup[3];
279 size_t ingrouplen = 0;
282 encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen,
284 encode_partial_group(encoded, ingroup, ingrouplen, linelen,
286 return svn_stringbuf__morph_into_string(encoded);
290 svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool)
292 return svn_base64_encode_string2(str, TRUE, pool);
297 /* Base64-encoded input --> binary output */
299 struct decode_baton {
300 svn_stream_t *output;
301 unsigned char buf[4]; /* Bytes waiting to be decoded */
302 int buflen; /* Number of bytes waiting */
303 svn_boolean_t done; /* True if we already saw an '=' */
304 apr_pool_t *scratch_pool;
308 /* Base64-decode a group. IN needs to have four bytes and OUT needs
309 to have room for three bytes. The input bytes must already have
310 been decoded from base64tab into the range 0..63. The four
311 six-bit values are pasted together to form three eight-bit bytes. */
312 static APR_INLINE void
313 decode_group(const unsigned char *in, char *out)
315 out[0] = (char)((in[0] << 2) | (in[1] >> 4));
316 out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2));
317 out[2] = (char)(((in[2] & 0x3) << 6) | in[3]);
320 /* Lookup table for base64 characters; reverse_base64[ch] gives a
321 negative value if ch is not a valid base64 character, or otherwise
322 the value of the byte represented; 'A' => 0 etc. */
323 static const signed char reverse_base64[256] = {
324 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
325 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
326 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
327 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
328 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
329 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
330 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
331 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
332 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
333 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
334 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
335 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
336 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
337 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
338 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
339 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
342 /* Similar to decode_group but this function also translates the
343 6-bit values from the IN buffer before translating them.
344 Return FALSE if a non-base64 char (e.g. '=' or new line)
345 has been encountered. */
346 static APR_INLINE svn_boolean_t
347 decode_group_directly(const unsigned char *in, char *out)
349 /* Translate the base64 chars in values [0..63, 0xff] */
350 apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
351 apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
352 apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
353 apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];
355 /* Pack 4x6 bits into 3x8.*/
356 out[0] = (char)((part0 << 2) | (part1 >> 4));
357 out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
358 out[2] = (char)(((part2 & 0x3) << 6) | part3);
360 /* FALSE, iff any part is 0xff. */
361 return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
364 /* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
365 STR. After the function returns, *DATA will point to the first char
366 that has not been translated, yet. Returns TRUE if all BASE64_LINELEN
367 chars could be translated, i.e. no special char has been encountered
369 The code in this function will simply transform the data without
370 performing any boundary checks. Therefore, DATA must have at least
371 BASE64_LINELEN left and space for at least another BYTES_PER_LINE
372 chars must have been pre-allocated in STR before calling this
375 decode_line(svn_stringbuf_t *str, const char **data)
377 /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
378 const unsigned char *p = *(const unsigned char **)data;
379 char *out = str->data + str->len;
380 char *end = out + BYTES_PER_LINE;
382 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
383 a multiple of 4. Stop translation as soon as we encounter a special
384 char. Leave the entire group untouched in that case. */
385 for (; out < end; p += 4, out += 3)
386 if (!decode_group_directly(p, out))
389 /* Update string sizes and positions. */
390 str->len = out - str->data;
392 *data = (const char *)p;
394 /* Return FALSE, if the caller should continue the decoding process
395 using the slow standard method. */
400 /* (Continue to) Base64-decode the byte string DATA (of length LEN)
401 into STR. INBUF, INBUFLEN, and DONE are used internally; the
402 caller shall have room for four bytes in INBUF and initialize
403 *INBUFLEN to 0 and *DONE to FALSE.
405 INBUF and *INBUFLEN carry the leftover bytes from call to call, and
406 *DONE keeps track of whether we've seen an '=' which terminates the
409 decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
410 unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
412 const char *p = data;
415 const char *end = data + len;
417 /* Resize the stringbuf to make room for the maximum size of output,
418 to avoid repeated resizes later. The optimizations in
419 decode_line rely on no resizes being necessary!
421 (*inbuflen+len) is encoded data length
422 (*inbuflen+len)/4 is the number of complete 4-bytes sets
423 (*inbuflen+len)/4*3 is the number of decoded bytes
424 svn_stringbuf_ensure will add an additional byte for the terminating 0.
426 svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3);
428 while ( !*done && p < end )
430 /* If no data is left in temporary INBUF and there is at least
431 one line-sized chunk left to decode, we may use the optimized
433 if ((*inbuflen == 0) && (end - p >= BASE64_LINELEN))
434 if (decode_line(str, &p))
437 /* A special case or decode_line encountered a special char. */
440 /* We are at the end and have to decode a partial group. */
443 memset(inbuf + *inbuflen, 0, 4 - *inbuflen);
444 decode_group(inbuf, group);
445 svn_stringbuf_appendbytes(str, group, *inbuflen - 1);
451 find = reverse_base64[(unsigned char)*p];
455 inbuf[(*inbuflen)++] = find;
458 decode_group(inbuf, group);
459 svn_stringbuf_appendbytes(str, group, 3);
467 /* Write handler for svn_base64_decode. */
469 decode_data(void *baton, const char *data, apr_size_t *len)
471 struct decode_baton *db = baton;
472 svn_stringbuf_t *decoded;
474 svn_error_t *err = SVN_NO_ERROR;
476 /* Decode this block of data. */
477 decoded = svn_stringbuf_create_empty(db->scratch_pool);
478 decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done);
480 /* Write the output, clean up, go home. */
481 declen = decoded->len;
483 err = svn_stream_write(db->output, decoded->data, &declen);
484 svn_pool_clear(db->scratch_pool);
489 /* Close handler for svn_base64_decode(). */
491 finish_decoding_data(void *baton)
493 struct decode_baton *db = baton;
496 /* Pass on the close request and clean up the baton. */
497 err = svn_stream_close(db->output);
498 svn_pool_destroy(db->scratch_pool);
504 svn_base64_decode(svn_stream_t *output, apr_pool_t *pool)
506 struct decode_baton *db = apr_palloc(pool, sizeof(*db));
507 svn_stream_t *stream;
512 db->scratch_pool = svn_pool_create(pool);
513 stream = svn_stream_create(db, pool);
514 svn_stream_set_write(stream, decode_data);
515 svn_stream_set_close(stream, finish_decoding_data);
521 svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool)
523 svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool);
524 unsigned char ingroup[4];
526 svn_boolean_t done = FALSE;
528 decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done);
529 return svn_stringbuf__morph_into_string(decoded);
533 /* Return a base64-encoded representation of CHECKSUM, allocated in POOL.
534 If CHECKSUM->kind is not recognized, return NULL.
535 ### That 'NULL' claim was in the header file when this was public, but
536 doesn't look true in the implementation.
538 ### This is now only used as a new implementation of svn_base64_from_md5();
539 it would probably be safer to revert that to its old implementation. */
540 static svn_stringbuf_t *
541 base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool)
543 svn_stringbuf_t *checksum_str;
544 unsigned char ingroup[3];
545 size_t ingrouplen = 0;
547 checksum_str = svn_stringbuf_create_empty(pool);
549 encode_bytes(checksum_str, checksum->digest,
550 svn_checksum_size(checksum), ingroup, &ingrouplen,
552 encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE);
554 /* Our base64-encoding routines append a final newline if any data
555 was created at all, so let's hack that off. */
556 if (checksum_str->len)
559 checksum_str->data[checksum_str->len] = 0;
567 svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool)
569 svn_checksum_t *checksum
570 = svn_checksum__from_digest_md5(digest, pool);
572 return base64_from_checksum(checksum, pool);