contrib/xz/src/common/tuklib_integer.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       tuklib_integer.h
   4 /// \brief      Various integer and bit operations
   5 ///
   6 /// This file provides macros or functions to do some basic integer and bit
   7 /// operations.
   8 ///
   9 /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
  10 ///   - Byte swapping: bswapXX(num)
  11 ///   - Byte order conversions to/from native: convXXYe(num)
  12 ///   - Aligned reads: readXXYe(ptr)
  13 ///   - Aligned writes: writeXXYe(ptr, num)
  14 ///   - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
  15 ///   - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
  16 ///
  17 /// Since they can macros, the arguments should have no side effects since
  18 /// they may be evaluated more than once.
  19 ///
  20 /// \todo       PowerPC and possibly some other architectures support
  21 ///             byte swapping load and store instructions. This file
  22 ///             doesn't take advantage of those instructions.
  23 ///
  24 /// Bit scan operations for non-zero 32-bit integers:
  25 ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
  26 ///   - Count leading zeros: clz32(num)
  27 ///   - Count trailing zeros: ctz32(num)
  28 ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  29 ///
  30 /// The above bit scan operations return 0-31. If num is zero,
  31 /// the result is undefined.
  32 //
  33 //  Authors:    Lasse Collin
  34 //              Joachim Henke
  35 //
  36 //  This file has been put into the public domain.
  37 //  You can do whatever you want with this file.
  38 //
  39 ///////////////////////////////////////////////////////////////////////////////
  40
  41 #ifndef TUKLIB_INTEGER_H
  42 #define TUKLIB_INTEGER_H
  43
  44 #include "tuklib_common.h"
  45
  46
  47 ////////////////////////////////////////
  48 // Operating system specific features //
  49 ////////////////////////////////////////
  50
  51 #if defined(HAVE_BYTESWAP_H)
  52         // glibc, uClibc, dietlibc
  53 #       include <byteswap.h>
  54 #       ifdef HAVE_BSWAP_16
  55 #               define bswap16(num) bswap_16(num)
  56 #       endif
  57 #       ifdef HAVE_BSWAP_32
  58 #               define bswap32(num) bswap_32(num)
  59 #       endif
  60 #       ifdef HAVE_BSWAP_64
  61 #               define bswap64(num) bswap_64(num)
  62 #       endif
  63
  64 #elif defined(HAVE_SYS_ENDIAN_H)
  65         // *BSDs and Darwin
  66 #       include <sys/endian.h>
  67
  68 #elif defined(HAVE_SYS_BYTEORDER_H)
  69         // Solaris
  70 #       include <sys/byteorder.h>
  71 #       ifdef BSWAP_16
  72 #               define bswap16(num) BSWAP_16(num)
  73 #       endif
  74 #       ifdef BSWAP_32
  75 #               define bswap32(num) BSWAP_32(num)
  76 #       endif
  77 #       ifdef BSWAP_64
  78 #               define bswap64(num) BSWAP_64(num)
  79 #       endif
  80 #       ifdef BE_16
  81 #               define conv16be(num) BE_16(num)
  82 #       endif
  83 #       ifdef BE_32
  84 #               define conv32be(num) BE_32(num)
  85 #       endif
  86 #       ifdef BE_64
  87 #               define conv64be(num) BE_64(num)
  88 #       endif
  89 #       ifdef LE_16
  90 #               define conv16le(num) LE_16(num)
  91 #       endif
  92 #       ifdef LE_32
  93 #               define conv32le(num) LE_32(num)
  94 #       endif
  95 #       ifdef LE_64
  96 #               define conv64le(num) LE_64(num)
  97 #       endif
  98 #endif
  99
 100
 101 ///////////////////
 102 // Byte swapping //
 103 ///////////////////
 104
 105 #ifndef bswap16
 106 #       define bswap16(num) \
 107                 (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
 108 #endif
 109
 110 #ifndef bswap32
 111 #       define bswap32(num) \
 112                 ( (((uint32_t)(num) << 24)                       ) \
 113                 | (((uint32_t)(num) <<  8) & UINT32_C(0x00FF0000)) \
 114                 | (((uint32_t)(num) >>  8) & UINT32_C(0x0000FF00)) \
 115                 | (((uint32_t)(num) >> 24)                       ) )
 116 #endif
 117
 118 #ifndef bswap64
 119 #       define bswap64(num) \
 120                 ( (((uint64_t)(num) << 56)                               ) \
 121                 | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
 122                 | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
 123                 | (((uint64_t)(num) <<  8) & UINT64_C(0x000000FF00000000)) \
 124                 | (((uint64_t)(num) >>  8) & UINT64_C(0x00000000FF000000)) \
 125                 | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
 126                 | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
 127                 | (((uint64_t)(num) >> 56)                               ) )
 128 #endif
 129
 130 // Define conversion macros using the basic byte swapping macros.
 131 #ifdef WORDS_BIGENDIAN
 132 #       ifndef conv16be
 133 #               define conv16be(num) ((uint16_t)(num))
 134 #       endif
 135 #       ifndef conv32be
 136 #               define conv32be(num) ((uint32_t)(num))
 137 #       endif
 138 #       ifndef conv64be
 139 #               define conv64be(num) ((uint64_t)(num))
 140 #       endif
 141 #       ifndef conv16le
 142 #               define conv16le(num) bswap16(num)
 143 #       endif
 144 #       ifndef conv32le
 145 #               define conv32le(num) bswap32(num)
 146 #       endif
 147 #       ifndef conv64le
 148 #               define conv64le(num) bswap64(num)
 149 #       endif
 150 #else
 151 #       ifndef conv16be
 152 #               define conv16be(num) bswap16(num)
 153 #       endif
 154 #       ifndef conv32be
 155 #               define conv32be(num) bswap32(num)
 156 #       endif
 157 #       ifndef conv64be
 158 #               define conv64be(num) bswap64(num)
 159 #       endif
 160 #       ifndef conv16le
 161 #               define conv16le(num) ((uint16_t)(num))
 162 #       endif
 163 #       ifndef conv32le
 164 #               define conv32le(num) ((uint32_t)(num))
 165 #       endif
 166 #       ifndef conv64le
 167 #               define conv64le(num) ((uint64_t)(num))
 168 #       endif
 169 #endif
 170
 171
 172 //////////////////////////////
 173 // Aligned reads and writes //
 174 //////////////////////////////
 175
 176 static inline uint16_t
 177 read16be(const uint8_t *buf)
 178 {
 179         uint16_t num = *(const uint16_t *)buf;
 180         return conv16be(num);
 181 }
 182
 183
 184 static inline uint16_t
 185 read16le(const uint8_t *buf)
 186 {
 187         uint16_t num = *(const uint16_t *)buf;
 188         return conv16le(num);
 189 }
 190
 191
 192 static inline uint32_t
 193 read32be(const uint8_t *buf)
 194 {
 195         uint32_t num = *(const uint32_t *)buf;
 196         return conv32be(num);
 197 }
 198
 199
 200 static inline uint32_t
 201 read32le(const uint8_t *buf)
 202 {
 203         uint32_t num = *(const uint32_t *)buf;
 204         return conv32le(num);
 205 }
 206
 207
 208 static inline uint64_t
 209 read64be(const uint8_t *buf)
 210 {
 211         uint64_t num = *(const uint64_t *)buf;
 212         return conv64be(num);
 213 }
 214
 215
 216 static inline uint64_t
 217 read64le(const uint8_t *buf)
 218 {
 219         uint64_t num = *(const uint64_t *)buf;
 220         return conv64le(num);
 221 }
 222
 223
 224 // NOTE: Possible byte swapping must be done in a macro to allow GCC
 225 // to optimize byte swapping of constants when using glibc's or *BSD's
 226 // byte swapping macros. The actual write is done in an inline function
 227 // to make type checking of the buf pointer possible similarly to readXXYe()
 228 // functions.
 229
 230 #define write16be(buf, num) write16ne((buf), conv16be(num))
 231 #define write16le(buf, num) write16ne((buf), conv16le(num))
 232 #define write32be(buf, num) write32ne((buf), conv32be(num))
 233 #define write32le(buf, num) write32ne((buf), conv32le(num))
 234 #define write64be(buf, num) write64ne((buf), conv64be(num))
 235 #define write64le(buf, num) write64ne((buf), conv64le(num))
 236
 237
 238 static inline void
 239 write16ne(uint8_t *buf, uint16_t num)
 240 {
 241         *(uint16_t *)buf = num;
 242         return;
 243 }
 244
 245
 246 static inline void
 247 write32ne(uint8_t *buf, uint32_t num)
 248 {
 249         *(uint32_t *)buf = num;
 250         return;
 251 }
 252
 253
 254 static inline void
 255 write64ne(uint8_t *buf, uint64_t num)
 256 {
 257         *(uint64_t *)buf = num;
 258         return;
 259 }
 260
 261
 262 ////////////////////////////////
 263 // Unaligned reads and writes //
 264 ////////////////////////////////
 265
 266 // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
 267 // 32-bit unaligned integer loads and stores. It's possible that 64-bit
 268 // unaligned access doesn't work or is slower than byte-by-byte access.
 269 // Since unaligned 64-bit is probably not needed as often as 16-bit or
 270 // 32-bit, we simply don't support 64-bit unaligned access for now.
 271 #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
 272 #       define unaligned_read16be read16be
 273 #       define unaligned_read16le read16le
 274 #       define unaligned_read32be read32be
 275 #       define unaligned_read32le read32le
 276 #       define unaligned_write16be write16be
 277 #       define unaligned_write16le write16le
 278 #       define unaligned_write32be write32be
 279 #       define unaligned_write32le write32le
 280
 281 #else
 282
 283 static inline uint16_t
 284 unaligned_read16be(const uint8_t *buf)
 285 {
 286         uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
 287         return num;
 288 }
 289
 290
 291 static inline uint16_t
 292 unaligned_read16le(const uint8_t *buf)
 293 {
 294         uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
 295         return num;
 296 }
 297
 298
 299 static inline uint32_t
 300 unaligned_read32be(const uint8_t *buf)
 301 {
 302         uint32_t num = (uint32_t)buf[0] << 24;
 303         num |= (uint32_t)buf[1] << 16;
 304         num |= (uint32_t)buf[2] << 8;
 305         num |= (uint32_t)buf[3];
 306         return num;
 307 }
 308
 309
 310 static inline uint32_t
 311 unaligned_read32le(const uint8_t *buf)
 312 {
 313         uint32_t num = (uint32_t)buf[0];
 314         num |= (uint32_t)buf[1] << 8;
 315         num |= (uint32_t)buf[2] << 16;
 316         num |= (uint32_t)buf[3] << 24;
 317         return num;
 318 }
 319
 320
 321 static inline void
 322 unaligned_write16be(uint8_t *buf, uint16_t num)
 323 {
 324         buf[0] = num >> 8;
 325         buf[1] = num;
 326         return;
 327 }
 328
 329
 330 static inline void
 331 unaligned_write16le(uint8_t *buf, uint16_t num)
 332 {
 333         buf[0] = num;
 334         buf[1] = num >> 8;
 335         return;
 336 }
 337
 338
 339 static inline void
 340 unaligned_write32be(uint8_t *buf, uint32_t num)
 341 {
 342         buf[0] = num >> 24;
 343         buf[1] = num >> 16;
 344         buf[2] = num >> 8;
 345         buf[3] = num;
 346         return;
 347 }
 348
 349
 350 static inline void
 351 unaligned_write32le(uint8_t *buf, uint32_t num)
 352 {
 353         buf[0] = num;
 354         buf[1] = num >> 8;
 355         buf[2] = num >> 16;
 356         buf[3] = num >> 24;
 357         return;
 358 }
 359
 360 #endif
 361
 362
 363 static inline uint32_t
 364 bsr32(uint32_t n)
 365 {
 366         // Check for ICC first, since it tends to define __GNUC__ too.
 367 #if defined(__INTEL_COMPILER)
 368         return _bit_scan_reverse(n);
 369
 370 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
 371         // GCC >= 3.4 has __builtin_clz(), which gives good results on
 372         // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
 373         // either plain BSR (so the XOR gets optimized away) or LZCNT and
 374         // XOR (if -march indicates that SSE4a instructions are supported).
 375         return __builtin_clz(n) ^ 31U;
 376
 377 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 378         uint32_t i;
 379         __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
 380         return i;
 381
 382 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 383         // MSVC isn't supported by tuklib, but since this code exists,
 384         // it doesn't hurt to have it here anyway.
 385         uint32_t i;
 386         _BitScanReverse((DWORD *)&i, n);
 387         return i;
 388
 389 #else
 390         uint32_t i = 31;
 391
 392         if ((n & UINT32_C(0xFFFF0000)) == 0) {
 393                 n <<= 16;
 394                 i = 15;
 395         }
 396
 397         if ((n & UINT32_C(0xFF000000)) == 0) {
 398                 n <<= 8;
 399                 i -= 8;
 400         }
 401
 402         if ((n & UINT32_C(0xF0000000)) == 0) {
 403                 n <<= 4;
 404                 i -= 4;
 405         }
 406
 407         if ((n & UINT32_C(0xC0000000)) == 0) {
 408                 n <<= 2;
 409                 i -= 2;
 410         }
 411
 412         if ((n & UINT32_C(0x80000000)) == 0)
 413                 --i;
 414
 415         return i;
 416 #endif
 417 }
 418
 419
 420 static inline uint32_t
 421 clz32(uint32_t n)
 422 {
 423 #if defined(__INTEL_COMPILER)
 424         return _bit_scan_reverse(n) ^ 31U;
 425
 426 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
 427         return __builtin_clz(n);
 428
 429 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 430         uint32_t i;
 431         __asm__("bsrl %1, %0\n\t"
 432                 "xorl $31, %0"
 433                 : "=r" (i) : "rm" (n));
 434         return i;
 435
 436 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 437         uint32_t i;
 438         _BitScanReverse((DWORD *)&i, n);
 439         return i ^ 31U;
 440
 441 #else
 442         uint32_t i = 0;
 443
 444         if ((n & UINT32_C(0xFFFF0000)) == 0) {
 445                 n <<= 16;
 446                 i = 16;
 447         }
 448
 449         if ((n & UINT32_C(0xFF000000)) == 0) {
 450                 n <<= 8;
 451                 i += 8;
 452         }
 453
 454         if ((n & UINT32_C(0xF0000000)) == 0) {
 455                 n <<= 4;
 456                 i += 4;
 457         }
 458
 459         if ((n & UINT32_C(0xC0000000)) == 0) {
 460                 n <<= 2;
 461                 i += 2;
 462         }
 463
 464         if ((n & UINT32_C(0x80000000)) == 0)
 465                 ++i;
 466
 467         return i;
 468 #endif
 469 }
 470
 471
 472 static inline uint32_t
 473 ctz32(uint32_t n)
 474 {
 475 #if defined(__INTEL_COMPILER)
 476         return _bit_scan_forward(n);
 477
 478 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
 479         return __builtin_ctz(n);
 480
 481 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 482         uint32_t i;
 483         __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
 484         return i;
 485
 486 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 487         uint32_t i;
 488         _BitScanForward((DWORD *)&i, n);
 489         return i;
 490
 491 #else
 492         uint32_t i = 0;
 493
 494         if ((n & UINT32_C(0x0000FFFF)) == 0) {
 495                 n >>= 16;
 496                 i = 16;
 497         }
 498
 499         if ((n & UINT32_C(0x000000FF)) == 0) {
 500                 n >>= 8;
 501                 i += 8;
 502         }
 503
 504         if ((n & UINT32_C(0x0000000F)) == 0) {
 505                 n >>= 4;
 506                 i += 4;
 507         }
 508
 509         if ((n & UINT32_C(0x00000003)) == 0) {
 510                 n >>= 2;
 511                 i += 2;
 512         }
 513
 514         if ((n & UINT32_C(0x00000001)) == 0)
 515                 ++i;
 516
 517         return i;
 518 #endif
 519 }
 520
 521 #define bsf32 ctz32
 522
 523 #endif