sys/opencrypto/gfmult.c

   1 /*-
   2  * Copyright (c) 2014 The FreeBSD Foundation
   3  * All rights reserved.
   4  *
   5  * This software was developed by John-Mark Gurney under
   6  * the sponsorship of the FreeBSD Foundation and
   7  * Rubicon Communications, LLC (Netgate).
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1.  Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  * 2.  Redistributions in binary form must reproduce the above copyright
  14  *     notice, this list of conditions and the following disclaimer in the
  15  *     documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  *      $FreeBSD$
  30  *
  31  */
  32
  33 #include "gfmult.h"
  34
  35 #define REV_POLY_REDUCT 0xe1    /* 0x87 bit reversed */
  36
  37 /* reverse the bits of a nibble */
  38 static const uint8_t nib_rev[] = {
  39         0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
  40         0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
  41 };
  42
  43 /* calculate v * 2 */
  44 static inline struct gf128
  45 gf128_mulalpha(struct gf128 v)
  46 {
  47         uint64_t mask;
  48
  49         mask = !!(v.v[1] & 1);
  50         mask = ~(mask - 1);
  51         v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);
  52         v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);
  53
  54         return v;
  55 }
  56
  57 /*
  58  * Generate a table for 0-16 * h.  Store the results in the table w/ indexes
  59  * bit reversed, and the words striped across the values.
  60  */
  61 void
  62 gf128_genmultable(struct gf128 h, struct gf128table *t)
  63 {
  64         struct gf128 tbl[16];
  65         int i;
  66
  67         tbl[0] = MAKE_GF128(0, 0);
  68         tbl[1] = h;
  69
  70         for (i = 2; i < 16; i += 2) {
  71                 tbl[i] = gf128_mulalpha(tbl[i / 2]);
  72                 tbl[i + 1] = gf128_add(tbl[i], h);
  73         }
  74
  75         for (i = 0; i < 16; i++) {
  76                 t->a[nib_rev[i]] = tbl[i].v[0] >> 32;
  77                 t->b[nib_rev[i]] = tbl[i].v[0];
  78                 t->c[nib_rev[i]] = tbl[i].v[1] >> 32;
  79                 t->d[nib_rev[i]] = tbl[i].v[1];
  80         }
  81 }
  82
  83 /*
  84  * Generate tables containing h, h^2, h^3 and h^4, starting at 0.
  85  */
  86 void
  87 gf128_genmultable4(struct gf128 h, struct gf128table4 *t)
  88 {
  89         struct gf128 h2, h3, h4;
  90
  91         gf128_genmultable(h, &t->tbls[0]);
  92
  93         h2 = gf128_mul(h, &t->tbls[0]);
  94
  95         gf128_genmultable(h2, &t->tbls[1]);
  96
  97         h3 = gf128_mul(h, &t->tbls[1]);
  98         gf128_genmultable(h3, &t->tbls[2]);
  99
 100         h4 = gf128_mul(h2, &t->tbls[1]);
 101         gf128_genmultable(h4, &t->tbls[3]);
 102 }
 103
 104 /*
 105  * Read a row from the table.
 106  */
 107 static inline struct gf128
 108 readrow(struct gf128table *tbl, unsigned bits)
 109 {
 110         struct gf128 r;
 111
 112         bits = bits % 16;
 113
 114         r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];
 115         r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];
 116
 117         return r;
 118 }
 119
 120 /*
 121  * These are the reduction values.  Since we are dealing with bit reversed
 122  * version, the values need to be bit reversed, AND the indexes are also
 123  * bit reversed to make lookups quicker.
 124  */
 125 static uint16_t reduction[] = {
 126         0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
 127         0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
 128 };
 129
 130 /*
 131  * Calculate:
 132  * (x*2^4 + word[3,0]*h) *
 133  * 2^4 + word[7,4]*h) *
 134  * ...
 135  * 2^4 + word[63,60]*h
 136  */
 137 static struct gf128
 138 gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)
 139 {
 140         struct gf128 row;
 141         unsigned bits;
 142         unsigned redbits;
 143         int i;
 144
 145         for (i = 0; i < 64; i += 4) {
 146                 bits = word % 16;
 147
 148                 /* fetch row */
 149                 row = readrow(tbl, bits);
 150
 151                 /* x * 2^4 */
 152                 redbits = x.v[1] % 16;
 153                 x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
 154                 x.v[0] >>= 4;
 155                 x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
 156
 157                 word >>= 4;
 158
 159                 x = gf128_add(x, row);
 160         }
 161
 162         return x;
 163 }
 164
 165 /*
 166  * Calculate
 167  * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) *
 168  * ...
 169  * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h
 170  *
 171  * Passing/returning struct is .5% faster than passing in via pointer on
 172  * amd64.
 173  */
 174 static struct gf128
 175 gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,
 176     struct gf128 x, struct gf128table4 *tbl)
 177 {
 178         struct gf128 rowa, rowb, rowc, rowd;
 179         unsigned bitsa, bitsb, bitsc, bitsd;
 180         unsigned redbits;
 181         int i;
 182
 183         /*
 184          * XXX - nibble reverse words to save a shift? probably not as
 185          * nibble reverse would take 20 ops (5 * 4) verse 16
 186          */
 187
 188         for (i = 0; i < 64; i += 4) {
 189                 bitsa = worda % 16;
 190                 bitsb = wordb % 16;
 191                 bitsc = wordc % 16;
 192                 bitsd = wordd % 16;
 193
 194                 /* fetch row */
 195                 rowa = readrow(&tbl->tbls[3], bitsa);
 196                 rowb = readrow(&tbl->tbls[2], bitsb);
 197                 rowc = readrow(&tbl->tbls[1], bitsc);
 198                 rowd = readrow(&tbl->tbls[0], bitsd);
 199
 200                 /* x * 2^4 */
 201                 redbits = x.v[1] % 16;
 202                 x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
 203                 x.v[0] >>= 4;
 204                 x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
 205
 206                 worda >>= 4;
 207                 wordb >>= 4;
 208                 wordc >>= 4;
 209                 wordd >>= 4;
 210
 211                 x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,
 212                     gf128_add(rowc, rowd))));
 213         }
 214
 215         return x;
 216 }
 217
 218 struct gf128
 219 gf128_mul(struct gf128 v, struct gf128table *tbl)
 220 {
 221         struct gf128 ret;
 222
 223         ret = MAKE_GF128(0, 0);
 224
 225         ret = gfmultword(v.v[1], ret, tbl);
 226         ret = gfmultword(v.v[0], ret, tbl);
 227
 228         return ret;
 229 }
 230
 231 /*
 232  * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
 233  * (((a*h+b)*h+c)*h+d)*h
 234  */
 235 struct gf128
 236 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,
 237     struct gf128table4 *tbl)
 238 {
 239         struct gf128 tmp;
 240
 241         tmp = MAKE_GF128(0, 0);
 242
 243         tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
 244         tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
 245
 246         return tmp;
 247 }
 248
 249 /*
 250  * a = data[0..15] + r
 251  * b = data[16..31]
 252  * c = data[32..47]
 253  * d = data[48..63]
 254  *
 255  * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
 256  * (((a*h+b)*h+c)*h+d)*h
 257  */
 258 struct gf128
 259 gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)
 260 {
 261         struct gf128 a, b, c, d;
 262         struct gf128 tmp;
 263
 264         tmp = MAKE_GF128(0, 0);
 265
 266         a = gf128_add(r, gf128_read(&v[0*16]));
 267         b = gf128_read(&v[1*16]);
 268         c = gf128_read(&v[2*16]);
 269         d = gf128_read(&v[3*16]);
 270
 271         tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
 272         tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
 273
 274         return tmp;
 275 }