/* * Copyright (c) 2016 Thomas Pornin * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "inner.h" static void in_cbc_init(br_sslrec_in_cbc_context *cc, const br_block_cbcdec_class *bc_impl, const void *bc_key, size_t bc_key_len, const br_hash_class *dig_impl, const void *mac_key, size_t mac_key_len, size_t mac_out_len, const void *iv) { cc->vtable = &br_sslrec_in_cbc_vtable; cc->seq = 0; bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len); br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len); cc->mac_len = mac_out_len; if (iv == NULL) { memset(cc->iv, 0, sizeof cc->iv); cc->explicit_IV = 1; } else { memcpy(cc->iv, iv, bc_impl->block_size); cc->explicit_IV = 0; } } static int cbc_check_length(const br_sslrec_in_cbc_context *cc, size_t rlen) { /* * Plaintext size: at most 16384 bytes * Padding: at most 256 bytes * MAC: mac_len extra bytes * TLS 1.1+: each record has an explicit IV * * Minimum length includes at least one byte of padding, and the * MAC. * * Total length must be a multiple of the block size. */ size_t blen; size_t min_len, max_len; blen = cc->bc.vtable->block_size; min_len = (blen + cc->mac_len) & ~(blen - 1); max_len = (16384 + 256 + cc->mac_len) & ~(blen - 1); if (cc->explicit_IV) { min_len += blen; max_len += blen; } return min_len <= rlen && rlen <= max_len; } /* * Rotate array buf[] of length 'len' to the left (towards low indices) * by 'num' bytes if ctl is 1; otherwise, leave it unchanged. This is * constant-time. 'num' MUST be lower than 'len'. 'len' MUST be lower * than or equal to 64. */ static void cond_rotate(uint32_t ctl, unsigned char *buf, size_t len, size_t num) { unsigned char tmp[64]; size_t u, v; for (u = 0, v = num; u < len; u ++) { tmp[u] = MUX(ctl, buf[v], buf[u]); if (++ v == len) { v = 0; } } memcpy(buf, tmp, len); } static unsigned char * cbc_decrypt(br_sslrec_in_cbc_context *cc, int record_type, unsigned version, void *data, size_t *data_len) { /* * We represent all lengths on 32-bit integers, because: * -- SSL record lengths always fit in 32 bits; * -- our constant-time primitives operate on 32-bit integers. */ unsigned char *buf; uint32_t u, v, len, blen, min_len, max_len; uint32_t good, pad_len, rot_count, len_withmac, len_nomac; unsigned char tmp1[64], tmp2[64]; int i; br_hmac_context hc; buf = data; len = *data_len; blen = cc->bc.vtable->block_size; /* * Decrypt data, and skip the explicit IV (if applicable). Note * that the total length is supposed to have been verified by * the caller. If there is an explicit IV, then we actually * "decrypt" it using the implicit IV (from previous record), * which is useless but harmless. */ cc->bc.vtable->run(&cc->bc.vtable, cc->iv, data, len); if (cc->explicit_IV) { buf += blen; len -= blen; } /* * Compute minimum and maximum length of plaintext + MAC. These * lengths can be inferred from the outside: they are not secret. */ min_len = (cc->mac_len + 256 < len) ? len - 256 : cc->mac_len; max_len = len - 1; /* * Use the last decrypted byte to compute the actual payload * length. Take care not to underflow (we use unsigned types). */ pad_len = buf[max_len]; good = LE(pad_len, (uint32_t)(max_len - min_len)); len = MUX(good, (uint32_t)(max_len - pad_len), min_len); /* * Check padding contents: all padding bytes must be equal to * the value of pad_len. */ for (u = min_len; u < max_len; u ++) { good &= LT(u, len) | EQ(buf[u], pad_len); } /* * Extract the MAC value. This is done in one pass, but results * in a "rotated" MAC value depending on where it actually * occurs. The 'rot_count' value is set to the offset of the * first MAC byte within tmp1[]. * * min_len and max_len are also adjusted to the minimum and * maximum lengths of the plaintext alone (without the MAC). */ len_withmac = (uint32_t)len; len_nomac = len_withmac - cc->mac_len; min_len -= cc->mac_len; rot_count = 0; memset(tmp1, 0, cc->mac_len); v = 0; for (u = min_len; u < max_len; u ++) { tmp1[v] |= MUX(GE(u, len_nomac) & LT(u, len_withmac), buf[u], 0x00); rot_count = MUX(EQ(u, len_nomac), v, rot_count); if (++ v == cc->mac_len) { v = 0; } } max_len -= cc->mac_len; /* * Rotate back the MAC value. The loop below does the constant-time * rotation in time n*log n for a MAC output of length n. We assume * that the MAC output length is no more than 64 bytes, so the * rotation count fits on 6 bits. */ for (i = 5; i >= 0; i --) { uint32_t rc; rc = (uint32_t)1 << i; cond_rotate(rot_count >> i, tmp1, cc->mac_len, rc); rot_count &= ~rc; } /* * Recompute the HMAC value. The input is the concatenation of * the sequence number (8 bytes), the record header (5 bytes), * and the payload. * * At that point, min_len is the minimum plaintext length, but * max_len still includes the MAC length. */ br_enc64be(tmp2, cc->seq ++); tmp2[8] = (unsigned char)record_type; br_enc16be(tmp2 + 9, version); br_enc16be(tmp2 + 11, len_nomac); br_hmac_init(&hc, &cc->mac, cc->mac_len); br_hmac_update(&hc, tmp2, 13); br_hmac_outCT(&hc, buf, len_nomac, min_len, max_len, tmp2); /* * Compare the extracted and recomputed MAC values. */ for (u = 0; u < cc->mac_len; u ++) { good &= EQ0(tmp1[u] ^ tmp2[u]); } /* * Check that the plaintext length is valid. The previous * check was on the encrypted length, but the padding may have * turned shorter than expected. * * Once this final test is done, the critical "constant-time" * section ends and we can make conditional jumps again. */ good &= LE(len_nomac, 16384); if (!good) { return 0; } *data_len = len_nomac; return buf; } /* see bearssl_ssl.h */ const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable = { { sizeof(br_sslrec_in_cbc_context), (int (*)(const br_sslrec_in_class *const *, size_t)) &cbc_check_length, (unsigned char *(*)(const br_sslrec_in_class **, int, unsigned, void *, size_t *)) &cbc_decrypt }, (void (*)(const br_sslrec_in_cbc_class **, const br_block_cbcdec_class *, const void *, size_t, const br_hash_class *, const void *, size_t, size_t, const void *)) &in_cbc_init }; /* * For CBC output: * * -- With TLS 1.1+, there is an explicit IV. Generation method uses * HMAC, computed over the current sequence number, and the current MAC * key. The resulting value is truncated to the size of a block, and * added at the head of the plaintext; it will get encrypted along with * the data. This custom generation mechanism is "safe" under the * assumption that HMAC behaves like a random oracle; since the MAC for * a record is computed over the concatenation of the sequence number, * the record header and the plaintext, the HMAC-for-IV will not collide * with the normal HMAC. * * -- With TLS 1.0, for application data, we want to enforce a 1/n-1 * split, as a countermeasure against chosen-plaintext attacks. We thus * need to leave some room in the buffer for that extra record. */ static void out_cbc_init(br_sslrec_out_cbc_context *cc, const br_block_cbcenc_class *bc_impl, const void *bc_key, size_t bc_key_len, const br_hash_class *dig_impl, const void *mac_key, size_t mac_key_len, size_t mac_out_len, const void *iv) { cc->vtable = &br_sslrec_out_cbc_vtable; cc->seq = 0; bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len); br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len); cc->mac_len = mac_out_len; if (iv == NULL) { memset(cc->iv, 0, sizeof cc->iv); cc->explicit_IV = 1; } else { memcpy(cc->iv, iv, bc_impl->block_size); cc->explicit_IV = 0; } } static void cbc_max_plaintext(const br_sslrec_out_cbc_context *cc, size_t *start, size_t *end) { size_t blen, len; blen = cc->bc.vtable->block_size; if (cc->explicit_IV) { *start += blen; } else { *start += 4 + ((cc->mac_len + blen + 1) & ~(blen - 1)); } len = (*end - *start) & ~(blen - 1); len -= 1 + cc->mac_len; if (len > 16384) { len = 16384; } *end = *start + len; } static unsigned char * cbc_encrypt(br_sslrec_out_cbc_context *cc, int record_type, unsigned version, void *data, size_t *data_len) { unsigned char *buf, *rbuf; size_t len, blen, plen; unsigned char tmp[13]; br_hmac_context hc; buf = data; len = *data_len; blen = cc->bc.vtable->block_size; /* * If using TLS 1.0, with more than one byte of plaintext, and * the record is application data, then we need to compute * a "split". We do not perform the split on other record types * because it turned out that some existing, deployed * implementations of SSL/TLS do not tolerate the splitting of * some message types (in particular the Finished message). * * If using TLS 1.1+, then there is an explicit IV. We produce * that IV by adding an extra initial plaintext block, whose * value is computed with HMAC over the record sequence number. */ if (cc->explicit_IV) { /* * We use here the fact that all the HMAC variants we * support can produce at least 16 bytes, while all the * block ciphers we support have blocks of no more than * 16 bytes. Thus, we can always truncate the HMAC output * down to the block size. */ br_enc64be(tmp, cc->seq); br_hmac_init(&hc, &cc->mac, blen); br_hmac_update(&hc, tmp, 8); br_hmac_out(&hc, buf - blen); rbuf = buf - blen - 5; } else { if (len > 1 && record_type == BR_SSL_APPLICATION_DATA) { /* * To do the split, we use a recursive invocation; * since we only give one byte to the inner call, * the recursion stops there. * * We need to compute the exact size of the extra * record, so that the two resulting records end up * being sequential in RAM. * * We use here the fact that cbc_max_plaintext() * adjusted the start offset to leave room for the * initial fragment. */ size_t xlen; rbuf = buf - 4 - ((cc->mac_len + blen + 1) & ~(blen - 1)); rbuf[0] = buf[0]; xlen = 1; rbuf = cbc_encrypt(cc, record_type, version, rbuf, &xlen); buf ++; len --; } else { rbuf = buf - 5; } } /* * Compute MAC. */ br_enc64be(tmp, cc->seq ++); tmp[8] = record_type; br_enc16be(tmp + 9, version); br_enc16be(tmp + 11, len); br_hmac_init(&hc, &cc->mac, cc->mac_len); br_hmac_update(&hc, tmp, 13); br_hmac_update(&hc, buf, len); br_hmac_out(&hc, buf + len); len += cc->mac_len; /* * Add padding. */ plen = blen - (len & (blen - 1)); memset(buf + len, (unsigned)plen - 1, plen); len += plen; /* * If an explicit IV is used, the corresponding extra block was * already put in place earlier; we just have to account for it * here. */ if (cc->explicit_IV) { buf -= blen; len += blen; } /* * Encrypt the whole thing. If there is an explicit IV, we also * encrypt it, which is fine (encryption of a uniformly random * block is still a uniformly random block). */ cc->bc.vtable->run(&cc->bc.vtable, cc->iv, buf, len); /* * Add the header and return. */ buf[-5] = record_type; br_enc16be(buf - 4, version); br_enc16be(buf - 2, len); *data_len = (size_t)((buf + len) - rbuf); return rbuf; } /* see bearssl_ssl.h */ const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable = { { sizeof(br_sslrec_out_cbc_context), (void (*)(const br_sslrec_out_class *const *, size_t *, size_t *)) &cbc_max_plaintext, (unsigned char *(*)(const br_sslrec_out_class **, int, unsigned, void *, size_t *)) &cbc_encrypt }, (void (*)(const br_sslrec_out_cbc_class **, const br_block_cbcenc_class *, const void *, size_t, const br_hash_class *, const void *, size_t, size_t, const void *)) &out_cbc_init };