contrib/mandoc/term.c

   1 /*      $Id: term.c,v 1.281 2019/06/03 20:23:41 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010-2019 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19
  20 #include <sys/types.h>
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdint.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #include "mandoc.h"
  30 #include "mandoc_aux.h"
  31 #include "out.h"
  32 #include "term.h"
  33 #include "main.h"
  34
  35 static  size_t           cond_width(const struct termp *, int, int *);
  36 static  void             adjbuf(struct termp_col *, size_t);
  37 static  void             bufferc(struct termp *, char);
  38 static  void             encode(struct termp *, const char *, size_t);
  39 static  void             encode1(struct termp *, int);
  40 static  void             endline(struct termp *);
  41 static  void             term_field(struct termp *, size_t, size_t,
  42                                 size_t, size_t);
  43 static  void             term_fill(struct termp *, size_t *, size_t *,
  44                                 size_t);
  45
  46
  47 void
  48 term_setcol(struct termp *p, size_t maxtcol)
  49 {
  50         if (maxtcol > p->maxtcol) {
  51                 p->tcols = mandoc_recallocarray(p->tcols,
  52                     p->maxtcol, maxtcol, sizeof(*p->tcols));
  53                 p->maxtcol = maxtcol;
  54         }
  55         p->lasttcol = maxtcol - 1;
  56         p->tcol = p->tcols;
  57 }
  58
  59 void
  60 term_free(struct termp *p)
  61 {
  62         for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
  63                 free(p->tcol->buf);
  64         free(p->tcols);
  65         free(p->fontq);
  66         free(p);
  67 }
  68
  69 void
  70 term_begin(struct termp *p, term_margin head,
  71                 term_margin foot, const struct roff_meta *arg)
  72 {
  73
  74         p->headf = head;
  75         p->footf = foot;
  76         p->argf = arg;
  77         (*p->begin)(p);
  78 }
  79
  80 void
  81 term_end(struct termp *p)
  82 {
  83
  84         (*p->end)(p);
  85 }
  86
  87 /*
  88  * Flush a chunk of text.  By default, break the output line each time
  89  * the right margin is reached, and continue output on the next line
  90  * at the same offset as the chunk itself.  By default, also break the
  91  * output line at the end of the chunk.  There are many flags modifying
  92  * this behaviour, see the comments in the body of the function.
  93  */
  94 void
  95 term_flushln(struct termp *p)
  96 {
  97         size_t   vbl;      /* Number of blanks to prepend to the output. */
  98         size_t   vbr;      /* Actual visual position of the end of field. */
  99         size_t   vfield;   /* Desired visual field width. */
 100         size_t   vtarget;  /* Desired visual position of the right margin. */
 101         size_t   ic;       /* Character position in the input buffer. */
 102         size_t   nbr;      /* Number of characters to print in this field. */
 103
 104         /*
 105          * Normally, start writing at the left margin, but with the
 106          * NOPAD flag, start writing at the current position instead.
 107          */
 108
 109         vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
 110             0 : p->tcol->offset - p->viscol;
 111         if (p->minbl && vbl < p->minbl)
 112                 vbl = p->minbl;
 113
 114         if ((p->flags & TERMP_MULTICOL) == 0)
 115                 p->tcol->col = 0;
 116
 117         /* Loop over output lines. */
 118
 119         for (;;) {
 120                 vfield = p->tcol->rmargin > p->viscol + vbl ?
 121                     p->tcol->rmargin - p->viscol - vbl : 0;
 122
 123                 /*
 124                  * Normally, break the line at the the right margin
 125                  * of the field, but with the NOBREAK flag, only
 126                  * break it at the max right margin of the screen,
 127                  * and with the BRNEVER flag, never break it at all.
 128                  */
 129
 130                 vtarget = p->flags & TERMP_BRNEVER ? SIZE_MAX :
 131                     (p->flags & TERMP_NOBREAK) == 0 ? vfield :
 132                     p->maxrmargin > p->viscol + vbl ?
 133                     p->maxrmargin - p->viscol - vbl : 0;
 134
 135                 /*
 136                  * Figure out how much text will fit in the field.
 137                  * If there is whitespace only, print nothing.
 138                  */
 139
 140                 term_fill(p, &nbr, &vbr, vtarget);
 141                 if (nbr == 0)
 142                         break;
 143
 144                 /*
 145                  * With the CENTER or RIGHT flag, increase the indentation
 146                  * to center the text between the left and right margins
 147                  * or to adjust it to the right margin, respectively.
 148                  */
 149
 150                 if (vbr < vtarget) {
 151                         if (p->flags & TERMP_CENTER)
 152                                 vbl += (vtarget - vbr) / 2;
 153                         else if (p->flags & TERMP_RIGHT)
 154                                 vbl += vtarget - vbr;
 155                 }
 156
 157                 /* Finally, print the field content. */
 158
 159                 term_field(p, vbl, nbr, vbr, vtarget);
 160
 161                 /*
 162                  * If there is no text left in the field, exit the loop.
 163                  * If the BRTRSP flag is set, consider trailing
 164                  * whitespace significant when deciding whether
 165                  * the field fits or not.
 166                  */
 167
 168                 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
 169                         switch (p->tcol->buf[ic]) {
 170                         case '\t':
 171                                 if (p->flags & TERMP_BRTRSP)
 172                                         vbr = term_tab_next(vbr);
 173                                 continue;
 174                         case ' ':
 175                                 if (p->flags & TERMP_BRTRSP)
 176                                         vbr += (*p->width)(p, ' ');
 177                                 continue;
 178                         case '\n':
 179                         case ASCII_BREAK:
 180                                 continue;
 181                         default:
 182                                 break;
 183                         }
 184                         break;
 185                 }
 186                 if (ic == p->tcol->lastcol)
 187                         break;
 188
 189                 /*
 190                  * At the location of an automtic line break, input
 191                  * space characters are consumed by the line break.
 192                  */
 193
 194                 while (p->tcol->col < p->tcol->lastcol &&
 195                     p->tcol->buf[p->tcol->col] == ' ')
 196                         p->tcol->col++;
 197
 198                 /*
 199                  * In multi-column mode, leave the rest of the text
 200                  * in the buffer to be handled by a subsequent
 201                  * invocation, such that the other columns of the
 202                  * table can be handled first.
 203                  * In single-column mode, simply break the line.
 204                  */
 205
 206                 if (p->flags & TERMP_MULTICOL)
 207                         return;
 208
 209                 endline(p);
 210                 p->viscol = 0;
 211
 212                 /*
 213                  * Normally, start the next line at the same indentation
 214                  * as this one, but with the BRIND flag, start it at the
 215                  * right margin instead.  This is used together with
 216                  * NOBREAK for the tags in various kinds of tagged lists.
 217                  */
 218
 219                 vbl = p->flags & TERMP_BRIND ?
 220                     p->tcol->rmargin : p->tcol->offset;
 221         }
 222
 223         /* Reset output state in preparation for the next field. */
 224
 225         p->col = p->tcol->col = p->tcol->lastcol = 0;
 226         p->minbl = p->trailspace;
 227         p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
 228
 229         if (p->flags & TERMP_MULTICOL)
 230                 return;
 231
 232         /*
 233          * The HANG flag means that the next field
 234          * always follows on the same line.
 235          * The NOBREAK flag means that the next field
 236          * follows on the same line unless the field was overrun.
 237          * Normally, break the line at the end of each field.
 238          */
 239
 240         if ((p->flags & TERMP_HANG) == 0 &&
 241             ((p->flags & TERMP_NOBREAK) == 0 ||
 242              vbr + term_len(p, p->trailspace) > vfield))
 243                 endline(p);
 244 }
 245
 246 /*
 247  * Store the number of input characters to print in this field in *nbr
 248  * and their total visual width to print in *vbr.
 249  * If there is only whitespace in the field, both remain zero.
 250  * The desired visual width of the field is provided by vtarget.
 251  * If the first word is longer, the field will be overrun.
 252  */
 253 static void
 254 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
 255 {
 256         size_t   ic;        /* Character position in the input buffer. */
 257         size_t   vis;       /* Visual position of the current character. */
 258         size_t   vn;        /* Visual position of the next character. */
 259         int      breakline; /* Break at the end of this word. */
 260         int      graph;     /* Last character was non-blank. */
 261
 262         *nbr = *vbr = vis = 0;
 263         breakline = graph = 0;
 264         for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
 265                 switch (p->tcol->buf[ic]) {
 266                 case '\b':  /* Escape \o (overstrike) or backspace markup. */
 267                         assert(ic > 0);
 268                         vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
 269                         continue;
 270
 271                 case '\t':  /* Normal ASCII whitespace. */
 272                 case ' ':
 273                 case ASCII_BREAK:  /* Escape \: (breakpoint). */
 274                         switch (p->tcol->buf[ic]) {
 275                         case '\t':
 276                                 vn = term_tab_next(vis);
 277                                 break;
 278                         case ' ':
 279                                 vn = vis + (*p->width)(p, ' ');
 280                                 break;
 281                         case ASCII_BREAK:
 282                                 vn = vis;
 283                                 break;
 284                         default:
 285                                 abort();
 286                         }
 287                         /* Can break at the end of a word. */
 288                         if (breakline || vn > vtarget)
 289                                 break;
 290                         if (graph) {
 291                                 *nbr = ic;
 292                                 *vbr = vis;
 293                                 graph = 0;
 294                         }
 295                         vis = vn;
 296                         continue;
 297
 298                 case '\n':  /* Escape \p (break at the end of the word). */
 299                         breakline = 1;
 300                         continue;
 301
 302                 case ASCII_HYPH:  /* Breakable hyphen. */
 303                         graph = 1;
 304                         /*
 305                          * We are about to decide whether to break the
 306                          * line or not, so we no longer need this hyphen
 307                          * to be marked as breakable.  Put back a real
 308                          * hyphen such that we get the correct width.
 309                          */
 310                         p->tcol->buf[ic] = '-';
 311                         vis += (*p->width)(p, '-');
 312                         if (vis > vtarget) {
 313                                 ic++;
 314                                 break;
 315                         }
 316                         *nbr = ic + 1;
 317                         *vbr = vis;
 318                         continue;
 319
 320                 case ASCII_NBRSP:  /* Non-breakable space. */
 321                         p->tcol->buf[ic] = ' ';
 322                         /* FALLTHROUGH */
 323                 default:  /* Printable character. */
 324                         graph = 1;
 325                         vis += (*p->width)(p, p->tcol->buf[ic]);
 326                         if (vis > vtarget && *nbr > 0)
 327                                 return;
 328                         continue;
 329                 }
 330                 break;
 331         }
 332
 333         /*
 334          * If the last word extends to the end of the field without any
 335          * trailing whitespace, the loop could not check yet whether it
 336          * can remain on this line.  So do the check now.
 337          */
 338
 339         if (graph && (vis <= vtarget || *nbr == 0)) {
 340                 *nbr = ic;
 341                 *vbr = vis;
 342         }
 343 }
 344
 345 /*
 346  * Print the contents of one field
 347  * with an indentation of        vbl      visual columns,
 348  * an input string length of     nbr      characters,
 349  * an output width of            vbr      visual columns,
 350  * and a desired field width of  vtarget  visual columns.
 351  */
 352 static void
 353 term_field(struct termp *p, size_t vbl, size_t nbr, size_t vbr, size_t vtarget)
 354 {
 355         size_t   ic;    /* Character position in the input buffer. */
 356         size_t   vis;   /* Visual position of the current character. */
 357         size_t   dv;    /* Visual width of the current character. */
 358         size_t   vn;    /* Visual position of the next character. */
 359
 360         vis = 0;
 361         for (ic = p->tcol->col; ic < nbr; ic++) {
 362
 363                 /*
 364                  * To avoid the printing of trailing whitespace,
 365                  * do not print whitespace right away, only count it.
 366                  */
 367
 368                 switch (p->tcol->buf[ic]) {
 369                 case '\n':
 370                 case ASCII_BREAK:
 371                         continue;
 372                 case '\t':
 373                         vn = term_tab_next(vis);
 374                         vbl += vn - vis;
 375                         vis = vn;
 376                         continue;
 377                 case ' ':
 378                 case ASCII_NBRSP:
 379                         dv = (*p->width)(p, ' ');
 380                         vbl += dv;
 381                         vis += dv;
 382                         continue;
 383                 default:
 384                         break;
 385                 }
 386
 387                 /*
 388                  * We found a non-blank character to print,
 389                  * so write preceding white space now.
 390                  */
 391
 392                 if (vbl > 0) {
 393                         (*p->advance)(p, vbl);
 394                         p->viscol += vbl;
 395                         vbl = 0;
 396                 }
 397
 398                 /* Print the character and adjust the visual position. */
 399
 400                 (*p->letter)(p, p->tcol->buf[ic]);
 401                 if (p->tcol->buf[ic] == '\b') {
 402                         dv = (*p->width)(p, p->tcol->buf[ic - 1]);
 403                         p->viscol -= dv;
 404                         vis -= dv;
 405                 } else {
 406                         dv = (*p->width)(p, p->tcol->buf[ic]);
 407                         p->viscol += dv;
 408                         vis += dv;
 409                 }
 410         }
 411         p->tcol->col = nbr;
 412 }
 413
 414 static void
 415 endline(struct termp *p)
 416 {
 417         if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
 418                 p->mc = NULL;
 419                 p->flags &= ~TERMP_ENDMC;
 420         }
 421         if (p->mc != NULL) {
 422                 if (p->viscol && p->maxrmargin >= p->viscol)
 423                         (*p->advance)(p, p->maxrmargin - p->viscol + 1);
 424                 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
 425                 term_word(p, p->mc);
 426                 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
 427         }
 428         p->viscol = 0;
 429         p->minbl = 0;
 430         (*p->endline)(p);
 431 }
 432
 433 /*
 434  * A newline only breaks an existing line; it won't assert vertical
 435  * space.  All data in the output buffer is flushed prior to the newline
 436  * assertion.
 437  */
 438 void
 439 term_newln(struct termp *p)
 440 {
 441
 442         p->flags |= TERMP_NOSPACE;
 443         if (p->tcol->lastcol || p->viscol)
 444                 term_flushln(p);
 445 }
 446
 447 /*
 448  * Asserts a vertical space (a full, empty line-break between lines).
 449  * Note that if used twice, this will cause two blank spaces and so on.
 450  * All data in the output buffer is flushed prior to the newline
 451  * assertion.
 452  */
 453 void
 454 term_vspace(struct termp *p)
 455 {
 456
 457         term_newln(p);
 458         p->viscol = 0;
 459         p->minbl = 0;
 460         if (0 < p->skipvsp)
 461                 p->skipvsp--;
 462         else
 463                 (*p->endline)(p);
 464 }
 465
 466 /* Swap current and previous font; for \fP and .ft P */
 467 void
 468 term_fontlast(struct termp *p)
 469 {
 470         enum termfont    f;
 471
 472         f = p->fontl;
 473         p->fontl = p->fontq[p->fonti];
 474         p->fontq[p->fonti] = f;
 475 }
 476
 477 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
 478 void
 479 term_fontrepl(struct termp *p, enum termfont f)
 480 {
 481
 482         p->fontl = p->fontq[p->fonti];
 483         p->fontq[p->fonti] = f;
 484 }
 485
 486 /* Set font, save previous. */
 487 void
 488 term_fontpush(struct termp *p, enum termfont f)
 489 {
 490
 491         p->fontl = p->fontq[p->fonti];
 492         if (++p->fonti == p->fontsz) {
 493                 p->fontsz += 8;
 494                 p->fontq = mandoc_reallocarray(p->fontq,
 495                     p->fontsz, sizeof(*p->fontq));
 496         }
 497         p->fontq[p->fonti] = f;
 498 }
 499
 500 /* Flush to make the saved pointer current again. */
 501 void
 502 term_fontpopq(struct termp *p, int i)
 503 {
 504
 505         assert(i >= 0);
 506         if (p->fonti > i)
 507                 p->fonti = i;
 508 }
 509
 510 /* Pop one font off the stack. */
 511 void
 512 term_fontpop(struct termp *p)
 513 {
 514
 515         assert(p->fonti);
 516         p->fonti--;
 517 }
 518
 519 /*
 520  * Handle pwords, partial words, which may be either a single word or a
 521  * phrase that cannot be broken down (such as a literal string).  This
 522  * handles word styling.
 523  */
 524 void
 525 term_word(struct termp *p, const char *word)
 526 {
 527         struct roffsu    su;
 528         const char       nbrsp[2] = { ASCII_NBRSP, 0 };
 529         const char      *seq, *cp;
 530         int              sz, uc;
 531         size_t           csz, lsz, ssz;
 532         enum mandoc_esc  esc;
 533
 534         if ((p->flags & TERMP_NOBUF) == 0) {
 535                 if ((p->flags & TERMP_NOSPACE) == 0) {
 536                         if ((p->flags & TERMP_KEEP) == 0) {
 537                                 bufferc(p, ' ');
 538                                 if (p->flags & TERMP_SENTENCE)
 539                                         bufferc(p, ' ');
 540                         } else
 541                                 bufferc(p, ASCII_NBRSP);
 542                 }
 543                 if (p->flags & TERMP_PREKEEP)
 544                         p->flags |= TERMP_KEEP;
 545                 if (p->flags & TERMP_NONOSPACE)
 546                         p->flags |= TERMP_NOSPACE;
 547                 else
 548                         p->flags &= ~TERMP_NOSPACE;
 549                 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
 550                 p->skipvsp = 0;
 551         }
 552
 553         while ('\0' != *word) {
 554                 if ('\\' != *word) {
 555                         if (TERMP_NBRWORD & p->flags) {
 556                                 if (' ' == *word) {
 557                                         encode(p, nbrsp, 1);
 558                                         word++;
 559                                         continue;
 560                                 }
 561                                 ssz = strcspn(word, "\\ ");
 562                         } else
 563                                 ssz = strcspn(word, "\\");
 564                         encode(p, word, ssz);
 565                         word += (int)ssz;
 566                         continue;
 567                 }
 568
 569                 word++;
 570                 esc = mandoc_escape(&word, &seq, &sz);
 571                 switch (esc) {
 572                 case ESCAPE_UNICODE:
 573                         uc = mchars_num2uc(seq + 1, sz - 1);
 574                         break;
 575                 case ESCAPE_NUMBERED:
 576                         uc = mchars_num2char(seq, sz);
 577                         if (uc < 0)
 578                                 continue;
 579                         break;
 580                 case ESCAPE_SPECIAL:
 581                         if (p->enc == TERMENC_ASCII) {
 582                                 cp = mchars_spec2str(seq, sz, &ssz);
 583                                 if (cp != NULL)
 584                                         encode(p, cp, ssz);
 585                         } else {
 586                                 uc = mchars_spec2cp(seq, sz);
 587                                 if (uc > 0)
 588                                         encode1(p, uc);
 589                         }
 590                         continue;
 591                 case ESCAPE_UNDEF:
 592                         uc = *seq;
 593                         break;
 594                 case ESCAPE_FONTBOLD:
 595                         term_fontrepl(p, TERMFONT_BOLD);
 596                         continue;
 597                 case ESCAPE_FONTITALIC:
 598                         term_fontrepl(p, TERMFONT_UNDER);
 599                         continue;
 600                 case ESCAPE_FONTBI:
 601                         term_fontrepl(p, TERMFONT_BI);
 602                         continue;
 603                 case ESCAPE_FONT:
 604                 case ESCAPE_FONTCW:
 605                 case ESCAPE_FONTROMAN:
 606                         term_fontrepl(p, TERMFONT_NONE);
 607                         continue;
 608                 case ESCAPE_FONTPREV:
 609                         term_fontlast(p);
 610                         continue;
 611                 case ESCAPE_BREAK:
 612                         bufferc(p, '\n');
 613                         continue;
 614                 case ESCAPE_NOSPACE:
 615                         if (p->flags & TERMP_BACKAFTER)
 616                                 p->flags &= ~TERMP_BACKAFTER;
 617                         else if (*word == '\0')
 618                                 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
 619                         continue;
 620                 case ESCAPE_DEVICE:
 621                         if (p->type == TERMTYPE_PDF)
 622                                 encode(p, "pdf", 3);
 623                         else if (p->type == TERMTYPE_PS)
 624                                 encode(p, "ps", 2);
 625                         else if (p->enc == TERMENC_ASCII)
 626                                 encode(p, "ascii", 5);
 627                         else
 628                                 encode(p, "utf8", 4);
 629                         continue;
 630                 case ESCAPE_HORIZ:
 631                         if (*seq == '|') {
 632                                 seq++;
 633                                 uc = -p->col;
 634                         } else
 635                                 uc = 0;
 636                         if (a2roffsu(seq, &su, SCALE_EM) == NULL)
 637                                 continue;
 638                         uc += term_hen(p, &su);
 639                         if (uc > 0)
 640                                 while (uc-- > 0)
 641                                         bufferc(p, ASCII_NBRSP);
 642                         else if (p->col > (size_t)(-uc))
 643                                 p->col += uc;
 644                         else {
 645                                 uc += p->col;
 646                                 p->col = 0;
 647                                 if (p->tcol->offset > (size_t)(-uc)) {
 648                                         p->ti += uc;
 649                                         p->tcol->offset += uc;
 650                                 } else {
 651                                         p->ti -= p->tcol->offset;
 652                                         p->tcol->offset = 0;
 653                                 }
 654                         }
 655                         continue;
 656                 case ESCAPE_HLINE:
 657                         if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
 658                                 continue;
 659                         uc = term_hen(p, &su);
 660                         if (uc <= 0) {
 661                                 if (p->tcol->rmargin <= p->tcol->offset)
 662                                         continue;
 663                                 lsz = p->tcol->rmargin - p->tcol->offset;
 664                         } else
 665                                 lsz = uc;
 666                         if (*cp == seq[-1])
 667                                 uc = -1;
 668                         else if (*cp == '\\') {
 669                                 seq = cp + 1;
 670                                 esc = mandoc_escape(&seq, &cp, &sz);
 671                                 switch (esc) {
 672                                 case ESCAPE_UNICODE:
 673                                         uc = mchars_num2uc(cp + 1, sz - 1);
 674                                         break;
 675                                 case ESCAPE_NUMBERED:
 676                                         uc = mchars_num2char(cp, sz);
 677                                         break;
 678                                 case ESCAPE_SPECIAL:
 679                                         uc = mchars_spec2cp(cp, sz);
 680                                         break;
 681                                 case ESCAPE_UNDEF:
 682                                         uc = *seq;
 683                                         break;
 684                                 default:
 685                                         uc = -1;
 686                                         break;
 687                                 }
 688                         } else
 689                                 uc = *cp;
 690                         if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
 691                                 uc = '_';
 692                         if (p->enc == TERMENC_ASCII) {
 693                                 cp = ascii_uc2str(uc);
 694                                 csz = term_strlen(p, cp);
 695                                 ssz = strlen(cp);
 696                         } else
 697                                 csz = (*p->width)(p, uc);
 698                         while (lsz >= csz) {
 699                                 if (p->enc == TERMENC_ASCII)
 700                                         encode(p, cp, ssz);
 701                                 else
 702                                         encode1(p, uc);
 703                                 lsz -= csz;
 704                         }
 705                         continue;
 706                 case ESCAPE_SKIPCHAR:
 707                         p->flags |= TERMP_BACKAFTER;
 708                         continue;
 709                 case ESCAPE_OVERSTRIKE:
 710                         cp = seq + sz;
 711                         while (seq < cp) {
 712                                 if (*seq == '\\') {
 713                                         mandoc_escape(&seq, NULL, NULL);
 714                                         continue;
 715                                 }
 716                                 encode1(p, *seq++);
 717                                 if (seq < cp) {
 718                                         if (p->flags & TERMP_BACKBEFORE)
 719                                                 p->flags |= TERMP_BACKAFTER;
 720                                         else
 721                                                 p->flags |= TERMP_BACKBEFORE;
 722                                 }
 723                         }
 724                         /* Trim trailing backspace/blank pair. */
 725                         if (p->tcol->lastcol > 2 &&
 726                             (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
 727                              p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
 728                                 p->tcol->lastcol -= 2;
 729                         if (p->col > p->tcol->lastcol)
 730                                 p->col = p->tcol->lastcol;
 731                         continue;
 732                 default:
 733                         continue;
 734                 }
 735
 736                 /*
 737                  * Common handling for Unicode and numbered
 738                  * character escape sequences.
 739                  */
 740
 741                 if (p->enc == TERMENC_ASCII) {
 742                         cp = ascii_uc2str(uc);
 743                         encode(p, cp, strlen(cp));
 744                 } else {
 745                         if ((uc < 0x20 && uc != 0x09) ||
 746                             (uc > 0x7E && uc < 0xA0))
 747                                 uc = 0xFFFD;
 748                         encode1(p, uc);
 749                 }
 750         }
 751         p->flags &= ~TERMP_NBRWORD;
 752 }
 753
 754 static void
 755 adjbuf(struct termp_col *c, size_t sz)
 756 {
 757         if (c->maxcols == 0)
 758                 c->maxcols = 1024;
 759         while (c->maxcols <= sz)
 760                 c->maxcols <<= 2;
 761         c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
 762 }
 763
 764 static void
 765 bufferc(struct termp *p, char c)
 766 {
 767         if (p->flags & TERMP_NOBUF) {
 768                 (*p->letter)(p, c);
 769                 return;
 770         }
 771         if (p->col + 1 >= p->tcol->maxcols)
 772                 adjbuf(p->tcol, p->col + 1);
 773         if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
 774                 p->tcol->buf[p->col] = c;
 775         if (p->tcol->lastcol < ++p->col)
 776                 p->tcol->lastcol = p->col;
 777 }
 778
 779 /*
 780  * See encode().
 781  * Do this for a single (probably unicode) value.
 782  * Does not check for non-decorated glyphs.
 783  */
 784 static void
 785 encode1(struct termp *p, int c)
 786 {
 787         enum termfont     f;
 788
 789         if (p->flags & TERMP_NOBUF) {
 790                 (*p->letter)(p, c);
 791                 return;
 792         }
 793
 794         if (p->col + 7 >= p->tcol->maxcols)
 795                 adjbuf(p->tcol, p->col + 7);
 796
 797         f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
 798             p->fontq[p->fonti] : TERMFONT_NONE;
 799
 800         if (p->flags & TERMP_BACKBEFORE) {
 801                 if (p->tcol->buf[p->col - 1] == ' ' ||
 802                     p->tcol->buf[p->col - 1] == '\t')
 803                         p->col--;
 804                 else
 805                         p->tcol->buf[p->col++] = '\b';
 806                 p->flags &= ~TERMP_BACKBEFORE;
 807         }
 808         if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
 809                 p->tcol->buf[p->col++] = '_';
 810                 p->tcol->buf[p->col++] = '\b';
 811         }
 812         if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
 813                 if (c == ASCII_HYPH)
 814                         p->tcol->buf[p->col++] = '-';
 815                 else
 816                         p->tcol->buf[p->col++] = c;
 817                 p->tcol->buf[p->col++] = '\b';
 818         }
 819         if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
 820                 p->tcol->buf[p->col] = c;
 821         if (p->tcol->lastcol < ++p->col)
 822                 p->tcol->lastcol = p->col;
 823         if (p->flags & TERMP_BACKAFTER) {
 824                 p->flags |= TERMP_BACKBEFORE;
 825                 p->flags &= ~TERMP_BACKAFTER;
 826         }
 827 }
 828
 829 static void
 830 encode(struct termp *p, const char *word, size_t sz)
 831 {
 832         size_t            i;
 833
 834         if (p->flags & TERMP_NOBUF) {
 835                 for (i = 0; i < sz; i++)
 836                         (*p->letter)(p, word[i]);
 837                 return;
 838         }
 839
 840         if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
 841                 adjbuf(p->tcol, p->col + 2 + (sz * 5));
 842
 843         for (i = 0; i < sz; i++) {
 844                 if (ASCII_HYPH == word[i] ||
 845                     isgraph((unsigned char)word[i]))
 846                         encode1(p, word[i]);
 847                 else {
 848                         if (p->tcol->lastcol <= p->col ||
 849                             (word[i] != ' ' && word[i] != ASCII_NBRSP))
 850                                 p->tcol->buf[p->col] = word[i];
 851                         p->col++;
 852
 853                         /*
 854                          * Postpone the effect of \z while handling
 855                          * an overstrike sequence from ascii_uc2str().
 856                          */
 857
 858                         if (word[i] == '\b' &&
 859                             (p->flags & TERMP_BACKBEFORE)) {
 860                                 p->flags &= ~TERMP_BACKBEFORE;
 861                                 p->flags |= TERMP_BACKAFTER;
 862                         }
 863                 }
 864         }
 865         if (p->tcol->lastcol < p->col)
 866                 p->tcol->lastcol = p->col;
 867 }
 868
 869 void
 870 term_setwidth(struct termp *p, const char *wstr)
 871 {
 872         struct roffsu    su;
 873         int              iop, width;
 874
 875         iop = 0;
 876         width = 0;
 877         if (NULL != wstr) {
 878                 switch (*wstr) {
 879                 case '+':
 880                         iop = 1;
 881                         wstr++;
 882                         break;
 883                 case '-':
 884                         iop = -1;
 885                         wstr++;
 886                         break;
 887                 default:
 888                         break;
 889                 }
 890                 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
 891                         width = term_hspan(p, &su);
 892                 else
 893                         iop = 0;
 894         }
 895         (*p->setwidth)(p, iop, width);
 896 }
 897
 898 size_t
 899 term_len(const struct termp *p, size_t sz)
 900 {
 901
 902         return (*p->width)(p, ' ') * sz;
 903 }
 904
 905 static size_t
 906 cond_width(const struct termp *p, int c, int *skip)
 907 {
 908
 909         if (*skip) {
 910                 (*skip) = 0;
 911                 return 0;
 912         } else
 913                 return (*p->width)(p, c);
 914 }
 915
 916 size_t
 917 term_strlen(const struct termp *p, const char *cp)
 918 {
 919         size_t           sz, rsz, i;
 920         int              ssz, skip, uc;
 921         const char      *seq, *rhs;
 922         enum mandoc_esc  esc;
 923         static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
 924                         ASCII_BREAK, '\0' };
 925
 926         /*
 927          * Account for escaped sequences within string length
 928          * calculations.  This follows the logic in term_word() as we
 929          * must calculate the width of produced strings.
 930          */
 931
 932         sz = 0;
 933         skip = 0;
 934         while ('\0' != *cp) {
 935                 rsz = strcspn(cp, rej);
 936                 for (i = 0; i < rsz; i++)
 937                         sz += cond_width(p, *cp++, &skip);
 938
 939                 switch (*cp) {
 940                 case '\\':
 941                         cp++;
 942                         rhs = NULL;
 943                         esc = mandoc_escape(&cp, &seq, &ssz);
 944                         switch (esc) {
 945                         case ESCAPE_UNICODE:
 946                                 uc = mchars_num2uc(seq + 1, ssz - 1);
 947                                 break;
 948                         case ESCAPE_NUMBERED:
 949                                 uc = mchars_num2char(seq, ssz);
 950                                 if (uc < 0)
 951                                         continue;
 952                                 break;
 953                         case ESCAPE_SPECIAL:
 954                                 if (p->enc == TERMENC_ASCII) {
 955                                         rhs = mchars_spec2str(seq, ssz, &rsz);
 956                                         if (rhs != NULL)
 957                                                 break;
 958                                 } else {
 959                                         uc = mchars_spec2cp(seq, ssz);
 960                                         if (uc > 0)
 961                                                 sz += cond_width(p, uc, &skip);
 962                                 }
 963                                 continue;
 964                         case ESCAPE_UNDEF:
 965                                 uc = *seq;
 966                                 break;
 967                         case ESCAPE_DEVICE:
 968                                 if (p->type == TERMTYPE_PDF) {
 969                                         rhs = "pdf";
 970                                         rsz = 3;
 971                                 } else if (p->type == TERMTYPE_PS) {
 972                                         rhs = "ps";
 973                                         rsz = 2;
 974                                 } else if (p->enc == TERMENC_ASCII) {
 975                                         rhs = "ascii";
 976                                         rsz = 5;
 977                                 } else {
 978                                         rhs = "utf8";
 979                                         rsz = 4;
 980                                 }
 981                                 break;
 982                         case ESCAPE_SKIPCHAR:
 983                                 skip = 1;
 984                                 continue;
 985                         case ESCAPE_OVERSTRIKE:
 986                                 rsz = 0;
 987                                 rhs = seq + ssz;
 988                                 while (seq < rhs) {
 989                                         if (*seq == '\\') {
 990                                                 mandoc_escape(&seq, NULL, NULL);
 991                                                 continue;
 992                                         }
 993                                         i = (*p->width)(p, *seq++);
 994                                         if (rsz < i)
 995                                                 rsz = i;
 996                                 }
 997                                 sz += rsz;
 998                                 continue;
 999                         default:
1000                                 continue;
1001                         }
1002
1003                         /*
1004                          * Common handling for Unicode and numbered
1005                          * character escape sequences.
1006                          */
1007
1008                         if (rhs == NULL) {
1009                                 if (p->enc == TERMENC_ASCII) {
1010                                         rhs = ascii_uc2str(uc);
1011                                         rsz = strlen(rhs);
1012                                 } else {
1013                                         if ((uc < 0x20 && uc != 0x09) ||
1014                                             (uc > 0x7E && uc < 0xA0))
1015                                                 uc = 0xFFFD;
1016                                         sz += cond_width(p, uc, &skip);
1017                                         continue;
1018                                 }
1019                         }
1020
1021                         if (skip) {
1022                                 skip = 0;
1023                                 break;
1024                         }
1025
1026                         /*
1027                          * Common handling for all escape sequences
1028                          * printing more than one character.
1029                          */
1030
1031                         for (i = 0; i < rsz; i++)
1032                                 sz += (*p->width)(p, *rhs++);
1033                         break;
1034                 case ASCII_NBRSP:
1035                         sz += cond_width(p, ' ', &skip);
1036                         cp++;
1037                         break;
1038                 case ASCII_HYPH:
1039                         sz += cond_width(p, '-', &skip);
1040                         cp++;
1041                         break;
1042                 default:
1043                         break;
1044                 }
1045         }
1046
1047         return sz;
1048 }
1049
1050 int
1051 term_vspan(const struct termp *p, const struct roffsu *su)
1052 {
1053         double           r;
1054         int              ri;
1055
1056         switch (su->unit) {
1057         case SCALE_BU:
1058                 r = su->scale / 40.0;
1059                 break;
1060         case SCALE_CM:
1061                 r = su->scale * 6.0 / 2.54;
1062                 break;
1063         case SCALE_FS:
1064                 r = su->scale * 65536.0 / 40.0;
1065                 break;
1066         case SCALE_IN:
1067                 r = su->scale * 6.0;
1068                 break;
1069         case SCALE_MM:
1070                 r = su->scale * 0.006;
1071                 break;
1072         case SCALE_PC:
1073                 r = su->scale;
1074                 break;
1075         case SCALE_PT:
1076                 r = su->scale / 12.0;
1077                 break;
1078         case SCALE_EN:
1079         case SCALE_EM:
1080                 r = su->scale * 0.6;
1081                 break;
1082         case SCALE_VS:
1083                 r = su->scale;
1084                 break;
1085         default:
1086                 abort();
1087         }
1088         ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1089         return ri < 66 ? ri : 1;
1090 }
1091
1092 /*
1093  * Convert a scaling width to basic units, rounding towards 0.
1094  */
1095 int
1096 term_hspan(const struct termp *p, const struct roffsu *su)
1097 {
1098
1099         return (*p->hspan)(p, su);
1100 }
1101
1102 /*
1103  * Convert a scaling width to basic units, rounding to closest.
1104  */
1105 int
1106 term_hen(const struct termp *p, const struct roffsu *su)
1107 {
1108         int bu;
1109
1110         if ((bu = (*p->hspan)(p, su)) >= 0)
1111                 return (bu + 11) / 24;
1112         else
1113                 return -((-bu + 11) / 24);
1114 }