contrib/compiler-rt/lib/builtins/udivmodti4.c

   1 /* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
   2  *
   3  *                    The LLVM Compiler Infrastructure
   4  *
   5  * This file is dual licensed under the MIT and the University of Illinois Open
   6  * Source Licenses. See LICENSE.TXT for details.
   7  *
   8  * ===----------------------------------------------------------------------===
   9  *
  10  * This file implements __udivmodti4 for the compiler_rt library.
  11  *
  12  * ===----------------------------------------------------------------------===
  13  */
  14
  15 #include "int_lib.h"
  16
  17 #ifdef CRT_HAS_128BIT
  18
  19 /* Effects: if rem != 0, *rem = a % b
  20  * Returns: a / b
  21  */
  22
  23 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
  24
  25 COMPILER_RT_ABI tu_int
  26 __udivmodti4(tu_int a, tu_int b, tu_int* rem)
  27 {
  28     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
  29     const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
  30     utwords n;
  31     n.all = a;
  32     utwords d;
  33     d.all = b;
  34     utwords q;
  35     utwords r;
  36     unsigned sr;
  37     /* special cases, X is unknown, K != 0 */
  38     if (n.s.high == 0)
  39     {
  40         if (d.s.high == 0)
  41         {
  42             /* 0 X
  43              * ---
  44              * 0 X
  45              */
  46             if (rem)
  47                 *rem = n.s.low % d.s.low;
  48             return n.s.low / d.s.low;
  49         }
  50         /* 0 X
  51          * ---
  52          * K X
  53          */
  54         if (rem)
  55             *rem = n.s.low;
  56         return 0;
  57     }
  58     /* n.s.high != 0 */
  59     if (d.s.low == 0)
  60     {
  61         if (d.s.high == 0)
  62         {
  63             /* K X
  64              * ---
  65              * 0 0
  66              */
  67             if (rem)
  68                 *rem = n.s.high % d.s.low;
  69             return n.s.high / d.s.low;
  70         }
  71         /* d.s.high != 0 */
  72         if (n.s.low == 0)
  73         {
  74             /* K 0
  75              * ---
  76              * K 0
  77              */
  78             if (rem)
  79             {
  80                 r.s.high = n.s.high % d.s.high;
  81                 r.s.low = 0;
  82                 *rem = r.all;
  83             }
  84             return n.s.high / d.s.high;
  85         }
  86         /* K K
  87          * ---
  88          * K 0
  89          */
  90         if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
  91         {
  92             if (rem)
  93             {
  94                 r.s.low = n.s.low;
  95                 r.s.high = n.s.high & (d.s.high - 1);
  96                 *rem = r.all;
  97             }
  98             return n.s.high >> __builtin_ctzll(d.s.high);
  99         }
 100         /* K K
 101          * ---
 102          * K 0
 103          */
 104         sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
 105         /* 0 <= sr <= n_udword_bits - 2 or sr large */
 106         if (sr > n_udword_bits - 2)
 107         {
 108            if (rem)
 109                 *rem = n.all;
 110             return 0;
 111         }
 112         ++sr;
 113         /* 1 <= sr <= n_udword_bits - 1 */
 114         /* q.all = n.all << (n_utword_bits - sr); */
 115         q.s.low = 0;
 116         q.s.high = n.s.low << (n_udword_bits - sr);
 117         /* r.all = n.all >> sr; */
 118         r.s.high = n.s.high >> sr;
 119         r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
 120     }
 121     else  /* d.s.low != 0 */
 122     {
 123         if (d.s.high == 0)
 124         {
 125             /* K X
 126              * ---
 127              * 0 K
 128              */
 129             if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
 130             {
 131                 if (rem)
 132                     *rem = n.s.low & (d.s.low - 1);
 133                 if (d.s.low == 1)
 134                     return n.all;
 135                 sr = __builtin_ctzll(d.s.low);
 136                 q.s.high = n.s.high >> sr;
 137                 q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
 138                 return q.all;
 139             }
 140             /* K X
 141              * ---
 142              * 0 K
 143              */
 144             sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
 145                                    - __builtin_clzll(n.s.high);
 146             /* 2 <= sr <= n_utword_bits - 1
 147              * q.all = n.all << (n_utword_bits - sr);
 148              * r.all = n.all >> sr;
 149              */
 150             if (sr == n_udword_bits)
 151             {
 152                 q.s.low = 0;
 153                 q.s.high = n.s.low;
 154                 r.s.high = 0;
 155                 r.s.low = n.s.high;
 156             }
 157             else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
 158             {
 159                 q.s.low = 0;
 160                 q.s.high = n.s.low << (n_udword_bits - sr);
 161                 r.s.high = n.s.high >> sr;
 162                 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
 163             }
 164             else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
 165             {
 166                 q.s.low = n.s.low << (n_utword_bits - sr);
 167                 q.s.high = (n.s.high << (n_utword_bits - sr)) |
 168                            (n.s.low >> (sr - n_udword_bits));
 169                 r.s.high = 0;
 170                 r.s.low = n.s.high >> (sr - n_udword_bits);
 171             }
 172         }
 173         else
 174         {
 175             /* K X
 176              * ---
 177              * K K
 178              */
 179             sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
 180             /*0 <= sr <= n_udword_bits - 1 or sr large */
 181             if (sr > n_udword_bits - 1)
 182             {
 183                if (rem)
 184                     *rem = n.all;
 185                 return 0;
 186             }
 187             ++sr;
 188             /* 1 <= sr <= n_udword_bits
 189              * q.all = n.all << (n_utword_bits - sr);
 190              * r.all = n.all >> sr;
 191              */
 192             q.s.low = 0;
 193             if (sr == n_udword_bits)
 194             {
 195                 q.s.high = n.s.low;
 196                 r.s.high = 0;
 197                 r.s.low = n.s.high;
 198             }
 199             else
 200             {
 201                 r.s.high = n.s.high >> sr;
 202                 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
 203                 q.s.high = n.s.low << (n_udword_bits - sr);
 204             }
 205         }
 206     }
 207     /* Not a special case
 208      * q and r are initialized with:
 209      * q.all = n.all << (n_utword_bits - sr);
 210      * r.all = n.all >> sr;
 211      * 1 <= sr <= n_utword_bits - 1
 212      */
 213     su_int carry = 0;
 214     for (; sr > 0; --sr)
 215     {
 216         /* r:q = ((r:q)  << 1) | carry */
 217         r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
 218         r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
 219         q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
 220         q.s.low  = (q.s.low  << 1) | carry;
 221         /* carry = 0;
 222          * if (r.all >= d.all)
 223          * {
 224          *     r.all -= d.all;
 225          *      carry = 1;
 226          * }
 227          */
 228         const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
 229         carry = s & 1;
 230         r.all -= d.all & s;
 231     }
 232     q.all = (q.all << 1) | carry;
 233     if (rem)
 234         *rem = r.all;
 235     return q.all;
 236 }
 237
 238 #endif /* CRT_HAS_128BIT */