usr.bin/split/split.c

   1 /*
   2  * Copyright (c) 1987, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 #include <sys/cdefs.h>
  35 __FBSDID("$FreeBSD$");
  36
  37 #ifndef lint
  38 static const char copyright[] =
  39 "@(#) Copyright (c) 1987, 1993, 1994\n\
  40         The Regents of the University of California.  All rights reserved.\n";
  41 #endif
  42
  43 #ifndef lint
  44 static const char sccsid[] = "@(#)split.c       8.2 (Berkeley) 4/16/94";
  45 #endif
  46
  47 #include <sys/param.h>
  48 #include <sys/types.h>
  49 #include <sys/stat.h>
  50
  51 #include <ctype.h>
  52 #include <err.h>
  53 #include <errno.h>
  54 #include <fcntl.h>
  55 #include <inttypes.h>
  56 #include <limits.h>
  57 #include <locale.h>
  58 #include <stdbool.h>
  59 #include <stdint.h>
  60 #include <stdio.h>
  61 #include <stdlib.h>
  62 #include <string.h>
  63 #include <unistd.h>
  64 #include <regex.h>
  65 #include <sysexits.h>
  66
  67 #define DEFLINE 1000                    /* Default num lines per file. */
  68
  69 static off_t     bytecnt;               /* Byte count to split on. */
  70 static off_t     chunks = 0;            /* Chunks count to split into. */
  71 static long      numlines;              /* Line count to split on. */
  72 static int       file_open;             /* If a file open. */
  73 static int       ifd = -1, ofd = -1;    /* Input/output file descriptors. */
  74 static char      bfr[MAXBSIZE];         /* I/O buffer. */
  75 static char      fname[MAXPATHLEN];     /* File name prefix. */
  76 static regex_t   rgx;
  77 static int       pflag;
  78 static bool      dflag;
  79 static long      sufflen = 2;           /* File name suffix length. */
  80
  81 static void newfile(void);
  82 static void split1(void);
  83 static void split2(void);
  84 static void split3(void);
  85 static void usage(void);
  86
  87 int
  88 main(int argc, char **argv)
  89 {
  90         intmax_t bytecnti;
  91         long scale;
  92         int ch;
  93         char *ep, *p;
  94
  95         setlocale(LC_ALL, "");
  96
  97         dflag = false;
  98         while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1)
  99                 switch (ch) {
 100                 case '0': case '1': case '2': case '3': case '4':
 101                 case '5': case '6': case '7': case '8': case '9':
 102                         /*
 103                          * Undocumented kludge: split was originally designed
 104                          * to take a number after a dash.
 105                          */
 106                         if (numlines == 0) {
 107                                 p = argv[optind - 1];
 108                                 if (p[0] == '-' && p[1] == ch && !p[2])
 109                                         numlines = strtol(++p, &ep, 10);
 110                                 else
 111                                         numlines =
 112                                             strtol(argv[optind] + 1, &ep, 10);
 113                                 if (numlines <= 0 || *ep)
 114                                         errx(EX_USAGE,
 115                                             "%s: illegal line count", optarg);
 116                         }
 117                         break;
 118                 case 'a':               /* Suffix length */
 119                         if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
 120                                 errx(EX_USAGE,
 121                                     "%s: illegal suffix length", optarg);
 122                         break;
 123                 case 'b':               /* Byte count. */
 124                         errno = 0;
 125                         if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 ||
 126                             strchr("kKmMgG", *ep) == NULL || errno != 0)
 127                                 errx(EX_USAGE,
 128                                     "%s: illegal byte count", optarg);
 129                         if (*ep == 'k' || *ep == 'K')
 130                                 scale = 1024;
 131                         else if (*ep == 'm' || *ep == 'M')
 132                                 scale = 1024 * 1024;
 133                         else if (*ep == 'g' || *ep == 'G')
 134                                 scale = 1024 * 1024 * 1024;
 135                         else
 136                                 scale = 1;
 137                         if (bytecnti > OFF_MAX / scale)
 138                                 errx(EX_USAGE, "%s: offset too large", optarg);
 139                         bytecnt = (off_t)(bytecnti * scale);
 140                         break;
 141                 case 'd':               /* Decimal suffix */
 142                         dflag = true;
 143                         break;
 144                 case 'l':               /* Line count. */
 145                         if (numlines != 0)
 146                                 usage();
 147                         if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
 148                                 errx(EX_USAGE,
 149                                     "%s: illegal line count", optarg);
 150                         break;
 151                 case 'n':               /* Chunks. */
 152                         if (!isdigit((unsigned char)optarg[0]) ||
 153                             (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
 154                             *ep != '\0') {
 155                                 errx(EX_USAGE, "%s: illegal number of chunks",
 156                                      optarg);
 157                         }
 158                         break;
 159
 160                 case 'p':               /* pattern matching. */
 161                         if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
 162                                 errx(EX_USAGE, "%s: illegal regexp", optarg);
 163                         pflag = 1;
 164                         break;
 165                 default:
 166                         usage();
 167                 }
 168         argv += optind;
 169         argc -= optind;
 170
 171         if (*argv != NULL) {                    /* Input file. */
 172                 if (strcmp(*argv, "-") == 0)
 173                         ifd = STDIN_FILENO;
 174                 else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
 175                         err(EX_NOINPUT, "%s", *argv);
 176                 ++argv;
 177         }
 178         if (*argv != NULL)                      /* File name prefix. */
 179                 if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
 180                         errx(EX_USAGE, "file name prefix is too long");
 181         if (*argv != NULL)
 182                 usage();
 183
 184         if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
 185                 errx(EX_USAGE, "suffix is too long");
 186         if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
 187                 usage();
 188
 189         if (numlines == 0)
 190                 numlines = DEFLINE;
 191         else if (bytecnt != 0 || chunks != 0)
 192                 usage();
 193
 194         if (bytecnt && chunks)
 195                 usage();
 196
 197         if (ifd == -1)                          /* Stdin by default. */
 198                 ifd = 0;
 199
 200         if (bytecnt) {
 201                 split1();
 202                 exit (0);
 203         } else if (chunks) {
 204                 split3();
 205                 exit (0);
 206         }
 207         split2();
 208         if (pflag)
 209                 regfree(&rgx);
 210         exit(0);
 211 }
 212
 213 /*
 214  * split1 --
 215  *      Split the input by bytes.
 216  */
 217 static void
 218 split1(void)
 219 {
 220         off_t bcnt;
 221         char *C;
 222         ssize_t dist, len;
 223         int nfiles;
 224
 225         nfiles = 0;
 226
 227         for (bcnt = 0;;)
 228                 switch ((len = read(ifd, bfr, MAXBSIZE))) {
 229                 case 0:
 230                         exit(0);
 231                 case -1:
 232                         err(EX_IOERR, "read");
 233                         /* NOTREACHED */
 234                 default:
 235                         if (!file_open) {
 236                                 if (!chunks || (nfiles < chunks)) {
 237                                         newfile();
 238                                         nfiles++;
 239                                 }
 240                         }
 241                         if (bcnt + len >= bytecnt) {
 242                                 dist = bytecnt - bcnt;
 243                                 if (write(ofd, bfr, dist) != dist)
 244                                         err(EX_IOERR, "write");
 245                                 len -= dist;
 246                                 for (C = bfr + dist; len >= bytecnt;
 247                                     len -= bytecnt, C += bytecnt) {
 248                                         if (!chunks || (nfiles < chunks)) {
 249                                         newfile();
 250                                                 nfiles++;
 251                                         }
 252                                         if (write(ofd,
 253                                             C, bytecnt) != bytecnt)
 254                                                 err(EX_IOERR, "write");
 255                                 }
 256                                 if (len != 0) {
 257                                         if (!chunks || (nfiles < chunks)) {
 258                                         newfile();
 259                                                 nfiles++;
 260                                         }
 261                                         if (write(ofd, C, len) != len)
 262                                                 err(EX_IOERR, "write");
 263                                 } else
 264                                         file_open = 0;
 265                                 bcnt = len;
 266                         } else {
 267                                 bcnt += len;
 268                                 if (write(ofd, bfr, len) != len)
 269                                         err(EX_IOERR, "write");
 270                         }
 271                 }
 272 }
 273
 274 /*
 275  * split2 --
 276  *      Split the input by lines.
 277  */
 278 static void
 279 split2(void)
 280 {
 281         long lcnt = 0;
 282         FILE *infp;
 283
 284         /* Stick a stream on top of input file descriptor */
 285         if ((infp = fdopen(ifd, "r")) == NULL)
 286                 err(EX_NOINPUT, "fdopen");
 287
 288         /* Process input one line at a time */
 289         while (fgets(bfr, sizeof(bfr), infp) != NULL) {
 290                 const int len = strlen(bfr);
 291
 292                 /* If line is too long to deal with, just write it out */
 293                 if (bfr[len - 1] != '\n')
 294                         goto writeit;
 295
 296                 /* Check if we need to start a new file */
 297                 if (pflag) {
 298                         regmatch_t pmatch;
 299
 300                         pmatch.rm_so = 0;
 301                         pmatch.rm_eo = len - 1;
 302                         if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
 303                                 newfile();
 304                 } else if (lcnt++ == numlines) {
 305                         newfile();
 306                         lcnt = 1;
 307                 }
 308
 309 writeit:
 310                 /* Open output file if needed */
 311                 if (!file_open)
 312                         newfile();
 313
 314                 /* Write out line */
 315                 if (write(ofd, bfr, len) != len)
 316                         err(EX_IOERR, "write");
 317         }
 318
 319         /* EOF or error? */
 320         if (ferror(infp))
 321                 err(EX_IOERR, "read");
 322         else
 323                 exit(0);
 324 }
 325
 326 /*
 327  * split3 --
 328  *      Split the input into specified number of chunks
 329  */
 330 static void
 331 split3(void)
 332 {
 333         struct stat sb;
 334
 335         if (fstat(ifd, &sb) == -1) {
 336                 err(1, "stat");
 337                 /* NOTREACHED */
 338         }
 339
 340         if (chunks > sb.st_size) {
 341                 errx(1, "can't split into more than %d files",
 342                     (int)sb.st_size);
 343                 /* NOTREACHED */
 344         }
 345
 346         bytecnt = sb.st_size / chunks;
 347         split1();
 348 }
 349
 350
 351 /*
 352  * newfile --
 353  *      Open a new output file.
 354  */
 355 static void
 356 newfile(void)
 357 {
 358         long i, maxfiles, tfnum;
 359         static long fnum;
 360         static char *fpnt;
 361         char beg, end;
 362         int pattlen;
 363
 364         if (ofd == -1) {
 365                 if (fname[0] == '\0') {
 366                         fname[0] = 'x';
 367                         fpnt = fname + 1;
 368                 } else {
 369                         fpnt = fname + strlen(fname);
 370                 }
 371                 ofd = fileno(stdout);
 372         }
 373
 374         if (dflag) {
 375                 beg = '0';
 376                 end = '9';
 377         }
 378         else {
 379                 beg = 'a';
 380                 end = 'z';
 381         }
 382         pattlen = end - beg + 1;
 383
 384         /* maxfiles = pattlen^sufflen, but don't use libm. */
 385         for (maxfiles = 1, i = 0; i < sufflen; i++)
 386                 if (LONG_MAX / pattlen < maxfiles)
 387                         errx(EX_USAGE, "suffix is too long (max %ld)", i);
 388                 else
 389                         maxfiles *= pattlen;
 390
 391         if (fnum == maxfiles)
 392                 errx(EX_DATAERR, "too many files");
 393
 394         /* Generate suffix of sufflen letters */
 395         tfnum = fnum;
 396         i = sufflen - 1;
 397         do {
 398                 fpnt[i] = tfnum % pattlen + beg;
 399                 tfnum /= pattlen;
 400         } while (i-- > 0);
 401         fpnt[sufflen] = '\0';
 402
 403         ++fnum;
 404         if (!freopen(fname, "w", stdout))
 405                 err(EX_IOERR, "%s", fname);
 406         file_open = 1;
 407 }
 408
 409 static void
 410 usage(void)
 411 {
 412         (void)fprintf(stderr,
 413 "usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
 414 "       split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
 415 "       split -n chunk_count [-a suffix_length] [file [prefix]]\n"
 416 "       split -p pattern [-a suffix_length] [file [prefix]]\n");
 417         exit(EX_USAGE);
 418 }