contrib/gnu-sort/lib/human.c

   1 /* human.c -- print human readable file size
   2
   3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
   4    Free Software Foundation, Inc.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 /* Written by Paul Eggert and Larry McVoy.  */
  21
  22 #if HAVE_CONFIG_H
  23 # include <config.h>
  24 #endif
  25
  26 #include "human.h"
  27
  28 #ifndef SIZE_MAX
  29 # define SIZE_MAX ((size_t) -1)
  30 #endif
  31 #ifndef UINTMAX_MAX
  32 # define UINTMAX_MAX ((uintmax_t) -1)
  33 #endif
  34
  35 #if HAVE_LOCALE_H && HAVE_LOCALECONV
  36 # include <locale.h>
  37 #endif
  38
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include "gettext.h"
  44 #define _(msgid) gettext (msgid)
  45
  46 #include <argmatch.h>
  47 #include <error.h>
  48 #include <xstrtol.h>
  49
  50 /* The maximum length of a suffix like "KiB".  */
  51 #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
  52
  53 static const char power_letter[] =
  54 {
  55   0,    /* not used */
  56   'K',  /* kibi ('k' for kilo is a special case) */
  57   'M',  /* mega or mebi */
  58   'G',  /* giga or gibi */
  59   'T',  /* tera or tebi */
  60   'P',  /* peta or pebi */
  61   'E',  /* exa or exbi */
  62   'Z',  /* zetta or 2**70 */
  63   'Y'   /* yotta or 2**80 */
  64 };
  65
  66
  67 /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
  68    possible, adjust VALUE according to the style.  */
  69
  70 static long double
  71 adjust_value (int inexact_style, long double value)
  72 {
  73   /* Do not use the floorl or ceill functions, as that would mean
  74      checking for their presence and possibly linking with the
  75      standard math library, which is a porting pain.  So leave the
  76      value alone if it is too large to easily round.  */
  77   if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
  78     {
  79       uintmax_t u = value;
  80       value = u + (inexact_style == human_ceiling && u != value);
  81     }
  82
  83   return value;
  84 }
  85
  86 /* Group the digits of NUMBER according to the grouping rules of the
  87    current locale.  NUMBER contains NUMBERLEN digits.  Modify the
  88    bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
  89    each byte inserted.  Return the starting address of the modified
  90    number.
  91
  92    To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
  93    lconv' from <locale.h>.  */
  94
  95 static char *
  96 group_number (char *number, size_t numberlen,
  97               char const *grouping, char const *thousands_sep)
  98 {
  99   register char *d;
 100   size_t grouplen = SIZE_MAX;
 101   size_t thousands_seplen = strlen (thousands_sep);
 102   size_t i = numberlen;
 103
 104   /* The maximum possible value for NUMBERLEN is the number of digits
 105      in the square of the largest uintmax_t, so double the size of
 106      uintmax_t before converting to a bound.  302 / 1000 is ceil
 107      (log10 (2.0)).  Add 1 for integer division truncation.  */
 108   char buf[2 * sizeof (uintmax_t) * CHAR_BIT * 302 / 1000 + 1];
 109
 110   memcpy (buf, number, numberlen);
 111   d = number + numberlen;
 112
 113   for (;;)
 114     {
 115       unsigned char g = *grouping;
 116
 117       if (g)
 118         {
 119           grouplen = g < CHAR_MAX ? g : i;
 120           grouping++;
 121         }
 122
 123       if (i < grouplen)
 124         grouplen = i;
 125
 126       d -= grouplen;
 127       i -= grouplen;
 128       memcpy (d, buf + i, grouplen);
 129       if (i == 0)
 130         return d;
 131
 132       d -= thousands_seplen;
 133       memcpy (d, thousands_sep, thousands_seplen);
 134     }
 135 }
 136
 137 /* Convert N to a human readable format in BUF, using the options OPTS.
 138
 139    N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
 140    be nonnegative.
 141
 142    Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
 143    must be positive.
 144
 145    Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
 146    to determine whether to take the ceiling or floor of any result
 147    that cannot be expressed exactly.
 148
 149    If (OPTS & human_group_digits), group the thousands digits
 150    according to the locale, e.g., `1,000,000' in an American English
 151    locale.
 152
 153    If (OPTS & human_autoscale), deduce the output block size
 154    automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
 155    output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
 156    of 1000 otherwise.  For example, assuming powers of 1024, 8500
 157    would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
 158    so on.  Numbers smaller than the power aren't modified.
 159    human_autoscale is normally used together with human_SI.
 160
 161    If (OPTS & human_SI), append an SI prefix indicating which power is
 162    being used.  If in addition (OPTS & human_B), append "B" (if base
 163    1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
 164    human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
 165    power of 1024 or of 1000, depending on (OPTS &
 166    human_base_1024).  */
 167
 168 char *
 169 human_readable (uintmax_t n, char *buf, int opts,
 170                 uintmax_t from_block_size, uintmax_t to_block_size)
 171 {
 172   int inexact_style =
 173     opts & (human_round_to_nearest | human_floor | human_ceiling);
 174   unsigned int base = opts & human_base_1024 ? 1024 : 1000;
 175   uintmax_t amt;
 176   int tenths;
 177   int exponent = -1;
 178   int exponent_max = sizeof power_letter - 1;
 179   char *p;
 180   char *psuffix;
 181   char const *integerlim;
 182
 183   /* 0 means adjusted N == AMT.TENTHS;
 184      1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
 185      2 means adjusted N == AMT.TENTHS + 0.05;
 186      3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
 187   int rounding;
 188
 189   char const *decimal_point = ".";
 190   size_t decimal_pointlen = 1;
 191   char const *grouping = "";
 192   char const *thousands_sep = "";
 193 #if HAVE_LOCALE_H && HAVE_LOCALECONV
 194   struct lconv const *l = localeconv ();
 195   size_t pointlen = strlen (l->decimal_point);
 196   if (0 < pointlen && pointlen <= MB_LEN_MAX)
 197     {
 198       decimal_point = l->decimal_point;
 199       decimal_pointlen = pointlen;
 200     }
 201   grouping = l->grouping;
 202   if (strlen (l->thousands_sep) <= MB_LEN_MAX)
 203     thousands_sep = l->thousands_sep;
 204 #endif
 205
 206   psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
 207   p = psuffix;
 208
 209   /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
 210      units.  If this can be done exactly with integer arithmetic, do
 211      not use floating point operations.  */
 212   if (to_block_size <= from_block_size)
 213     {
 214       if (from_block_size % to_block_size == 0)
 215         {
 216           uintmax_t multiplier = from_block_size / to_block_size;
 217           amt = n * multiplier;
 218           if (amt / multiplier == n)
 219             {
 220               tenths = 0;
 221               rounding = 0;
 222               goto use_integer_arithmetic;
 223             }
 224         }
 225     }
 226   else if (from_block_size != 0 && to_block_size % from_block_size == 0)
 227     {
 228       uintmax_t divisor = to_block_size / from_block_size;
 229       uintmax_t r10 = (n % divisor) * 10;
 230       uintmax_t r2 = (r10 % divisor) * 2;
 231       amt = n / divisor;
 232       tenths = r10 / divisor;
 233       rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
 234       goto use_integer_arithmetic;
 235     }
 236
 237   {
 238     /* Either the result cannot be computed easily using uintmax_t,
 239        or from_block_size is zero.  Fall back on floating point.
 240        FIXME: This can yield answers that are slightly off.  */
 241
 242     long double dto_block_size = to_block_size;
 243     long double damt = n * (from_block_size / dto_block_size);
 244     size_t buflen;
 245     size_t nonintegerlen;
 246
 247     if (! (opts & human_autoscale))
 248       {
 249         sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
 250         buflen = strlen (buf);
 251         nonintegerlen = 0;
 252       }
 253     else
 254       {
 255         long double e = 1;
 256         exponent = 0;
 257
 258         do
 259           {
 260             e *= base;
 261             exponent++;
 262           }
 263         while (e * base <= damt && exponent < exponent_max);
 264
 265         damt /= e;
 266
 267         sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
 268         buflen = strlen (buf);
 269         nonintegerlen = decimal_pointlen + 1;
 270
 271         if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
 272             || ((opts & human_suppress_point_zero)
 273                 && buf[buflen - 1] == '0'))
 274           {
 275             sprintf (buf, "%.0Lf",
 276                      adjust_value (inexact_style, damt * 10) / 10);
 277             buflen = strlen (buf);
 278             nonintegerlen = 0;
 279           }
 280       }
 281
 282     p = psuffix - buflen;
 283     memmove (p, buf, buflen);
 284     integerlim = p + buflen - nonintegerlen;
 285   }
 286   goto do_grouping;
 287
 288  use_integer_arithmetic:
 289   {
 290     /* The computation can be done exactly, with integer arithmetic.
 291
 292        Use power of BASE notation if requested and if adjusted AMT is
 293        large enough.  */
 294
 295     if (opts & human_autoscale)
 296       {
 297         exponent = 0;
 298
 299         if (base <= amt)
 300           {
 301             do
 302               {
 303                 unsigned int r10 = (amt % base) * 10 + tenths;
 304                 unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
 305                 amt /= base;
 306                 tenths = r10 / base;
 307                 rounding = (r2 < base
 308                             ? (r2 + rounding) != 0
 309                             : 2 + (base < r2 + rounding));
 310                 exponent++;
 311               }
 312             while (base <= amt && exponent < exponent_max);
 313
 314             if (amt < 10)
 315               {
 316                 if (inexact_style == human_round_to_nearest
 317                     ? 2 < rounding + (tenths & 1)
 318                     : inexact_style == human_ceiling && 0 < rounding)
 319                   {
 320                     tenths++;
 321                     rounding = 0;
 322
 323                     if (tenths == 10)
 324                       {
 325                         amt++;
 326                         tenths = 0;
 327                       }
 328                   }
 329
 330                 if (amt < 10
 331                     && (tenths || ! (opts & human_suppress_point_zero)))
 332                   {
 333                     *--p = '0' + tenths;
 334                     p -= decimal_pointlen;
 335                     memcpy (p, decimal_point, decimal_pointlen);
 336                     tenths = rounding = 0;
 337                   }
 338               }
 339           }
 340       }
 341
 342     if (inexact_style == human_round_to_nearest
 343         ? 5 < tenths + (0 < rounding + (amt & 1))
 344         : inexact_style == human_ceiling && 0 < tenths + rounding)
 345       {
 346         amt++;
 347
 348         if ((opts & human_autoscale)
 349             && amt == base && exponent < exponent_max)
 350           {
 351             exponent++;
 352             if (! (opts & human_suppress_point_zero))
 353               {
 354                 *--p = '0';
 355                 p -= decimal_pointlen;
 356                 memcpy (p, decimal_point, decimal_pointlen);
 357               }
 358             amt = 1;
 359           }
 360       }
 361
 362     integerlim = p;
 363
 364     do
 365       {
 366         int digit = amt % 10;
 367         *--p = digit + '0';
 368       }
 369     while ((amt /= 10) != 0);
 370   }
 371
 372  do_grouping:
 373   if (opts & human_group_digits)
 374     p = group_number (p, integerlim - p, grouping, thousands_sep);
 375
 376   if (opts & human_SI)
 377     {
 378       if (exponent < 0)
 379         {
 380           uintmax_t power;
 381           exponent = 0;
 382           for (power = 1; power < to_block_size; power *= base)
 383             if (++exponent == exponent_max)
 384               break;
 385         }
 386
 387       if (exponent)
 388         *psuffix++ = (! (opts & human_base_1024) && exponent == 1
 389                       ? 'k'
 390                       : power_letter[exponent]);
 391
 392       if (opts & human_B)
 393         {
 394           if ((opts & human_base_1024) && exponent)
 395             *psuffix++ = 'i';
 396           *psuffix++ = 'B';
 397         }
 398     }
 399
 400   *psuffix = '\0';
 401
 402   return p;
 403 }
 404
 405
 406 /* The default block size used for output.  This number may change in
 407    the future as disks get larger.  */
 408 #ifndef DEFAULT_BLOCK_SIZE
 409 # define DEFAULT_BLOCK_SIZE 1024
 410 #endif
 411
 412 static char const *const block_size_args[] = { "human-readable", "si", 0 };
 413 static int const block_size_opts[] =
 414   {
 415     human_autoscale + human_SI + human_base_1024,
 416     human_autoscale + human_SI
 417   };
 418
 419 static uintmax_t
 420 default_block_size (void)
 421 {
 422   return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
 423 }
 424
 425 static strtol_error
 426 humblock (char const *spec, uintmax_t *block_size, int *options)
 427 {
 428   int i;
 429   int opts = 0;
 430
 431   if (! spec
 432       && ! (spec = getenv ("BLOCK_SIZE"))
 433       && ! (spec = getenv ("BLOCKSIZE")))
 434     *block_size = default_block_size ();
 435   else
 436     {
 437       if (*spec == '\'')
 438         {
 439           opts |= human_group_digits;
 440           spec++;
 441         }
 442
 443       if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
 444         {
 445           opts |= block_size_opts[i];
 446           *block_size = 1;
 447         }
 448       else
 449         {
 450           char *ptr;
 451           strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
 452                                        "eEgGkKmMpPtTyYzZ0");
 453           if (e != LONGINT_OK)
 454             return e;
 455           for (; ! ('0' <= *spec && *spec <= '9'); spec++)
 456             if (spec == ptr)
 457               {
 458                 opts |= human_SI;
 459                 if (ptr[-1] == 'B')
 460                   opts |= human_B;
 461                 if (ptr[-1] != 'B' || ptr[-2] == 'i')
 462                   opts |= human_base_1024;
 463                 break;
 464               }
 465         }
 466     }
 467
 468   *options = opts;
 469   return LONGINT_OK;
 470 }
 471
 472 int
 473 human_options (char const *spec, bool report_errors, uintmax_t *block_size)
 474 {
 475   int opts;
 476   strtol_error e = humblock (spec, block_size, &opts);
 477   if (*block_size == 0)
 478     {
 479       *block_size = default_block_size ();
 480       e = LONGINT_INVALID;
 481     }
 482   if (e != LONGINT_OK && report_errors)
 483     STRTOL_FATAL_ERROR (spec, _("block size"), e);
 484   return opts;
 485 }