contrib/mdocml/term.c

   1 /*      $Id: term.c,v 1.201 2011/09/21 09:57:13 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <sys/types.h>
  23
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <stdint.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "mandoc.h"
  32 #include "out.h"
  33 #include "term.h"
  34 #include "main.h"
  35
  36 static  void             adjbuf(struct termp *p, int);
  37 static  void             bufferc(struct termp *, char);
  38 static  void             encode(struct termp *, const char *, size_t);
  39 static  void             encode1(struct termp *, int);
  40
  41 void
  42 term_free(struct termp *p)
  43 {
  44
  45         if (p->buf)
  46                 free(p->buf);
  47         if (p->symtab)
  48                 mchars_free(p->symtab);
  49
  50         free(p);
  51 }
  52
  53
  54 void
  55 term_begin(struct termp *p, term_margin head,
  56                 term_margin foot, const void *arg)
  57 {
  58
  59         p->headf = head;
  60         p->footf = foot;
  61         p->argf = arg;
  62         (*p->begin)(p);
  63 }
  64
  65
  66 void
  67 term_end(struct termp *p)
  68 {
  69
  70         (*p->end)(p);
  71 }
  72
  73 /*
  74  * Flush a line of text.  A "line" is loosely defined as being something
  75  * that should be followed by a newline, regardless of whether it's
  76  * broken apart by newlines getting there.  A line can also be a
  77  * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
  78  * not have a trailing newline.
  79  *
  80  * The following flags may be specified:
  81  *
  82  *  - TERMP_NOBREAK: this is the most important and is used when making
  83  *    columns.  In short: don't print a newline and instead expect the
  84  *    next call to do the padding up to the start of the next column.
  85  *
  86  *  - TERMP_TWOSPACE: make sure there is room for at least two space
  87  *    characters of padding.  Otherwise, rather break the line.
  88  *
  89  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
  90  *    the line is overrun, and don't pad-right if it's underrun.
  91  *
  92  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
  93  *    overrunning, instead save the position and continue at that point
  94  *    when the next invocation.
  95  *
  96  *  In-line line breaking:
  97  *
  98  *  If TERMP_NOBREAK is specified and the line overruns the right
  99  *  margin, it will break and pad-right to the right margin after
 100  *  writing.  If maxrmargin is violated, it will break and continue
 101  *  writing from the right-margin, which will lead to the above scenario
 102  *  upon exit.  Otherwise, the line will break at the right margin.
 103  */
 104 void
 105 term_flushln(struct termp *p)
 106 {
 107         int              i;     /* current input position in p->buf */
 108         size_t           vis;   /* current visual position on output */
 109         size_t           vbl;   /* number of blanks to prepend to output */
 110         size_t           vend;  /* end of word visual position on output */
 111         size_t           bp;    /* visual right border position */
 112         size_t           dv;    /* temporary for visual pos calculations */
 113         int              j;     /* temporary loop index for p->buf */
 114         int              jhy;   /* last hyph before overflow w/r/t j */
 115         size_t           maxvis; /* output position of visible boundary */
 116         size_t           mmax; /* used in calculating bp */
 117
 118         /*
 119          * First, establish the maximum columns of "visible" content.
 120          * This is usually the difference between the right-margin and
 121          * an indentation, but can be, for tagged lists or columns, a
 122          * small set of values.
 123          */
 124         assert  (p->rmargin >= p->offset);
 125         dv     = p->rmargin - p->offset;
 126         maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
 127         dv     = p->maxrmargin - p->offset;
 128         mmax   = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
 129
 130         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
 131
 132         /*
 133          * Calculate the required amount of padding.
 134          */
 135         vbl = p->offset + p->overstep > p->viscol ?
 136               p->offset + p->overstep - p->viscol : 0;
 137
 138         vis = vend = 0;
 139         i = 0;
 140
 141         while (i < p->col) {
 142                 /*
 143                  * Handle literal tab characters: collapse all
 144                  * subsequent tabs into a single huge set of spaces.
 145                  */
 146                 while (i < p->col && '\t' == p->buf[i]) {
 147                         vend = (vis / p->tabwidth + 1) * p->tabwidth;
 148                         vbl += vend - vis;
 149                         vis = vend;
 150                         i++;
 151                 }
 152
 153                 /*
 154                  * Count up visible word characters.  Control sequences
 155                  * (starting with the CSI) aren't counted.  A space
 156                  * generates a non-printing word, which is valid (the
 157                  * space is printed according to regular spacing rules).
 158                  */
 159
 160                 for (j = i, jhy = 0; j < p->col; j++) {
 161                         if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
 162                                 break;
 163
 164                         /* Back over the the last printed character. */
 165                         if (8 == p->buf[j]) {
 166                                 assert(j);
 167                                 vend -= (*p->width)(p, p->buf[j - 1]);
 168                                 continue;
 169                         }
 170
 171                         /* Regular word. */
 172                         /* Break at the hyphen point if we overrun. */
 173                         if (vend > vis && vend < bp &&
 174                                         ASCII_HYPH == p->buf[j])
 175                                 jhy = j;
 176
 177                         vend += (*p->width)(p, p->buf[j]);
 178                 }
 179
 180                 /*
 181                  * Find out whether we would exceed the right margin.
 182                  * If so, break to the next line.
 183                  */
 184                 if (vend > bp && 0 == jhy && vis > 0) {
 185                         vend -= vis;
 186                         (*p->endline)(p);
 187                         p->viscol = 0;
 188                         if (TERMP_NOBREAK & p->flags) {
 189                                 vbl = p->rmargin;
 190                                 vend += p->rmargin - p->offset;
 191                         } else
 192                                 vbl = p->offset;
 193
 194                         /* Remove the p->overstep width. */
 195
 196                         bp += (size_t)p->overstep;
 197                         p->overstep = 0;
 198                 }
 199
 200                 /* Write out the [remaining] word. */
 201                 for ( ; i < p->col; i++) {
 202                         if (vend > bp && jhy > 0 && i > jhy)
 203                                 break;
 204                         if ('\t' == p->buf[i])
 205                                 break;
 206                         if (' ' == p->buf[i]) {
 207                                 j = i;
 208                                 while (' ' == p->buf[i])
 209                                         i++;
 210                                 dv = (size_t)(i - j) * (*p->width)(p, ' ');
 211                                 vbl += dv;
 212                                 vend += dv;
 213                                 break;
 214                         }
 215                         if (ASCII_NBRSP == p->buf[i]) {
 216                                 vbl += (*p->width)(p, ' ');
 217                                 continue;
 218                         }
 219
 220                         /*
 221                          * Now we definitely know there will be
 222                          * printable characters to output,
 223                          * so write preceding white space now.
 224                          */
 225                         if (vbl) {
 226                                 (*p->advance)(p, vbl);
 227                                 p->viscol += vbl;
 228                                 vbl = 0;
 229                         }
 230
 231                         if (ASCII_HYPH == p->buf[i]) {
 232                                 (*p->letter)(p, '-');
 233                                 p->viscol += (*p->width)(p, '-');
 234                                 continue;
 235                         }
 236
 237                         (*p->letter)(p, p->buf[i]);
 238                         if (8 == p->buf[i])
 239                                 p->viscol -= (*p->width)(p, p->buf[i-1]);
 240                         else
 241                                 p->viscol += (*p->width)(p, p->buf[i]);
 242                 }
 243                 vis = vend;
 244         }
 245
 246         /*
 247          * If there was trailing white space, it was not printed;
 248          * so reset the cursor position accordingly.
 249          */
 250         if (vis)
 251                 vis -= vbl;
 252
 253         p->col = 0;
 254         p->overstep = 0;
 255
 256         if ( ! (TERMP_NOBREAK & p->flags)) {
 257                 p->viscol = 0;
 258                 (*p->endline)(p);
 259                 return;
 260         }
 261
 262         if (TERMP_HANG & p->flags) {
 263                 /* We need one blank after the tag. */
 264                 p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
 265
 266                 /*
 267                  * Behave exactly the same way as groff:
 268                  * If we have overstepped the margin, temporarily move
 269                  * it to the right and flag the rest of the line to be
 270                  * shorter.
 271                  * If we landed right at the margin, be happy.
 272                  * If we are one step before the margin, temporarily
 273                  * move it one step LEFT and flag the rest of the line
 274                  * to be longer.
 275                  */
 276                 if (p->overstep < -1)
 277                         p->overstep = 0;
 278                 return;
 279
 280         } else if (TERMP_DANGLE & p->flags)
 281                 return;
 282
 283         /* If the column was overrun, break the line. */
 284         if (maxvis <= vis +
 285             ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
 286                 (*p->endline)(p);
 287                 p->viscol = 0;
 288         }
 289 }
 290
 291
 292 /*
 293  * A newline only breaks an existing line; it won't assert vertical
 294  * space.  All data in the output buffer is flushed prior to the newline
 295  * assertion.
 296  */
 297 void
 298 term_newln(struct termp *p)
 299 {
 300
 301         p->flags |= TERMP_NOSPACE;
 302         if (p->col || p->viscol)
 303                 term_flushln(p);
 304 }
 305
 306
 307 /*
 308  * Asserts a vertical space (a full, empty line-break between lines).
 309  * Note that if used twice, this will cause two blank spaces and so on.
 310  * All data in the output buffer is flushed prior to the newline
 311  * assertion.
 312  */
 313 void
 314 term_vspace(struct termp *p)
 315 {
 316
 317         term_newln(p);
 318         p->viscol = 0;
 319         (*p->endline)(p);
 320 }
 321
 322 void
 323 term_fontlast(struct termp *p)
 324 {
 325         enum termfont    f;
 326
 327         f = p->fontl;
 328         p->fontl = p->fontq[p->fonti];
 329         p->fontq[p->fonti] = f;
 330 }
 331
 332
 333 void
 334 term_fontrepl(struct termp *p, enum termfont f)
 335 {
 336
 337         p->fontl = p->fontq[p->fonti];
 338         p->fontq[p->fonti] = f;
 339 }
 340
 341
 342 void
 343 term_fontpush(struct termp *p, enum termfont f)
 344 {
 345
 346         assert(p->fonti + 1 < 10);
 347         p->fontl = p->fontq[p->fonti];
 348         p->fontq[++p->fonti] = f;
 349 }
 350
 351
 352 const void *
 353 term_fontq(struct termp *p)
 354 {
 355
 356         return(&p->fontq[p->fonti]);
 357 }
 358
 359
 360 enum termfont
 361 term_fonttop(struct termp *p)
 362 {
 363
 364         return(p->fontq[p->fonti]);
 365 }
 366
 367
 368 void
 369 term_fontpopq(struct termp *p, const void *key)
 370 {
 371
 372         while (p->fonti >= 0 && key != &p->fontq[p->fonti])
 373                 p->fonti--;
 374         assert(p->fonti >= 0);
 375 }
 376
 377
 378 void
 379 term_fontpop(struct termp *p)
 380 {
 381
 382         assert(p->fonti);
 383         p->fonti--;
 384 }
 385
 386 /*
 387  * Handle pwords, partial words, which may be either a single word or a
 388  * phrase that cannot be broken down (such as a literal string).  This
 389  * handles word styling.
 390  */
 391 void
 392 term_word(struct termp *p, const char *word)
 393 {
 394         const char      *seq, *cp;
 395         char             c;
 396         int              sz, uc;
 397         size_t           ssz;
 398         enum mandoc_esc  esc;
 399
 400         if ( ! (TERMP_NOSPACE & p->flags)) {
 401                 if ( ! (TERMP_KEEP & p->flags)) {
 402                         if (TERMP_PREKEEP & p->flags)
 403                                 p->flags |= TERMP_KEEP;
 404                         bufferc(p, ' ');
 405                         if (TERMP_SENTENCE & p->flags)
 406                                 bufferc(p, ' ');
 407                 } else
 408                         bufferc(p, ASCII_NBRSP);
 409         }
 410
 411         if ( ! (p->flags & TERMP_NONOSPACE))
 412                 p->flags &= ~TERMP_NOSPACE;
 413         else
 414                 p->flags |= TERMP_NOSPACE;
 415
 416         p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
 417
 418         while ('\0' != *word) {
 419                 if ((ssz = strcspn(word, "\\")) > 0)
 420                         encode(p, word, ssz);
 421
 422                 word += (int)ssz;
 423                 if ('\\' != *word)
 424                         continue;
 425
 426                 word++;
 427                 esc = mandoc_escape(&word, &seq, &sz);
 428                 if (ESCAPE_ERROR == esc)
 429                         break;
 430
 431                 if (TERMENC_ASCII != p->enc)
 432                         switch (esc) {
 433                         case (ESCAPE_UNICODE):
 434                                 uc = mchars_num2uc(seq + 1, sz - 1);
 435                                 if ('\0' == uc)
 436                                         break;
 437                                 encode1(p, uc);
 438                                 continue;
 439                         case (ESCAPE_SPECIAL):
 440                                 uc = mchars_spec2cp(p->symtab, seq, sz);
 441                                 if (uc <= 0)
 442                                         break;
 443                                 encode1(p, uc);
 444                                 continue;
 445                         default:
 446                                 break;
 447                         }
 448
 449                 switch (esc) {
 450                 case (ESCAPE_UNICODE):
 451                         encode1(p, '?');
 452                         break;
 453                 case (ESCAPE_NUMBERED):
 454                         c = mchars_num2char(seq, sz);
 455                         if ('\0' != c)
 456                                 encode(p, &c, 1);
 457                         break;
 458                 case (ESCAPE_SPECIAL):
 459                         cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
 460                         if (NULL != cp)
 461                                 encode(p, cp, ssz);
 462                         else if (1 == ssz)
 463                                 encode(p, seq, sz);
 464                         break;
 465                 case (ESCAPE_FONTBOLD):
 466                         term_fontrepl(p, TERMFONT_BOLD);
 467                         break;
 468                 case (ESCAPE_FONTITALIC):
 469                         term_fontrepl(p, TERMFONT_UNDER);
 470                         break;
 471                 case (ESCAPE_FONT):
 472                         /* FALLTHROUGH */
 473                 case (ESCAPE_FONTROMAN):
 474                         term_fontrepl(p, TERMFONT_NONE);
 475                         break;
 476                 case (ESCAPE_FONTPREV):
 477                         term_fontlast(p);
 478                         break;
 479                 case (ESCAPE_NOSPACE):
 480                         if ('\0' == *word)
 481                                 p->flags |= TERMP_NOSPACE;
 482                         break;
 483                 default:
 484                         break;
 485                 }
 486         }
 487 }
 488
 489 static void
 490 adjbuf(struct termp *p, int sz)
 491 {
 492
 493         if (0 == p->maxcols)
 494                 p->maxcols = 1024;
 495         while (sz >= p->maxcols)
 496                 p->maxcols <<= 2;
 497
 498         p->buf = mandoc_realloc
 499                 (p->buf, sizeof(int) * (size_t)p->maxcols);
 500 }
 501
 502 static void
 503 bufferc(struct termp *p, char c)
 504 {
 505
 506         if (p->col + 1 >= p->maxcols)
 507                 adjbuf(p, p->col + 1);
 508
 509         p->buf[p->col++] = c;
 510 }
 511
 512 /*
 513  * See encode().
 514  * Do this for a single (probably unicode) value.
 515  * Does not check for non-decorated glyphs.
 516  */
 517 static void
 518 encode1(struct termp *p, int c)
 519 {
 520         enum termfont     f;
 521
 522         if (p->col + 4 >= p->maxcols)
 523                 adjbuf(p, p->col + 4);
 524
 525         f = term_fonttop(p);
 526
 527         if (TERMFONT_NONE == f) {
 528                 p->buf[p->col++] = c;
 529                 return;
 530         } else if (TERMFONT_UNDER == f) {
 531                 p->buf[p->col++] = '_';
 532         } else
 533                 p->buf[p->col++] = c;
 534
 535         p->buf[p->col++] = 8;
 536         p->buf[p->col++] = c;
 537 }
 538
 539 static void
 540 encode(struct termp *p, const char *word, size_t sz)
 541 {
 542         enum termfont     f;
 543         int               i, len;
 544
 545         /* LINTED */
 546         len = sz;
 547
 548         /*
 549          * Encode and buffer a string of characters.  If the current
 550          * font mode is unset, buffer directly, else encode then buffer
 551          * character by character.
 552          */
 553
 554         if (TERMFONT_NONE == (f = term_fonttop(p))) {
 555                 if (p->col + len >= p->maxcols)
 556                         adjbuf(p, p->col + len);
 557                 for (i = 0; i < len; i++)
 558                         p->buf[p->col++] = word[i];
 559                 return;
 560         }
 561
 562         /* Pre-buffer, assuming worst-case. */
 563
 564         if (p->col + 1 + (len * 3) >= p->maxcols)
 565                 adjbuf(p, p->col + 1 + (len * 3));
 566
 567         for (i = 0; i < len; i++) {
 568                 if (ASCII_HYPH != word[i] &&
 569                     ! isgraph((unsigned char)word[i])) {
 570                         p->buf[p->col++] = word[i];
 571                         continue;
 572                 }
 573
 574                 if (TERMFONT_UNDER == f)
 575                         p->buf[p->col++] = '_';
 576                 else if (ASCII_HYPH == word[i])
 577                         p->buf[p->col++] = '-';
 578                 else
 579                         p->buf[p->col++] = word[i];
 580
 581                 p->buf[p->col++] = 8;
 582                 p->buf[p->col++] = word[i];
 583         }
 584 }
 585
 586 size_t
 587 term_len(const struct termp *p, size_t sz)
 588 {
 589
 590         return((*p->width)(p, ' ') * sz);
 591 }
 592
 593
 594 size_t
 595 term_strlen(const struct termp *p, const char *cp)
 596 {
 597         size_t           sz, rsz, i;
 598         int              ssz, c;
 599         const char      *seq, *rhs;
 600         enum mandoc_esc  esc;
 601         static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
 602
 603         /*
 604          * Account for escaped sequences within string length
 605          * calculations.  This follows the logic in term_word() as we
 606          * must calculate the width of produced strings.
 607          */
 608
 609         sz = 0;
 610         while ('\0' != *cp) {
 611                 rsz = strcspn(cp, rej);
 612                 for (i = 0; i < rsz; i++)
 613                         sz += (*p->width)(p, *cp++);
 614
 615                 c = 0;
 616                 switch (*cp) {
 617                 case ('\\'):
 618                         cp++;
 619                         esc = mandoc_escape(&cp, &seq, &ssz);
 620                         if (ESCAPE_ERROR == esc)
 621                                 return(sz);
 622
 623                         if (TERMENC_ASCII != p->enc)
 624                                 switch (esc) {
 625                                 case (ESCAPE_UNICODE):
 626                                         c = mchars_num2uc
 627                                                 (seq + 1, ssz - 1);
 628                                         if ('\0' == c)
 629                                                 break;
 630                                         sz += (*p->width)(p, c);
 631                                         continue;
 632                                 case (ESCAPE_SPECIAL):
 633                                         c = mchars_spec2cp
 634                                                 (p->symtab, seq, ssz);
 635                                         if (c <= 0)
 636                                                 break;
 637                                         sz += (*p->width)(p, c);
 638                                         continue;
 639                                 default:
 640                                         break;
 641                                 }
 642
 643                         rhs = NULL;
 644
 645                         switch (esc) {
 646                         case (ESCAPE_UNICODE):
 647                                 sz += (*p->width)(p, '?');
 648                                 break;
 649                         case (ESCAPE_NUMBERED):
 650                                 c = mchars_num2char(seq, ssz);
 651                                 if ('\0' != c)
 652                                         sz += (*p->width)(p, c);
 653                                 break;
 654                         case (ESCAPE_SPECIAL):
 655                                 rhs = mchars_spec2str
 656                                         (p->symtab, seq, ssz, &rsz);
 657
 658                                 if (ssz != 1 || rhs)
 659                                         break;
 660
 661                                 rhs = seq;
 662                                 rsz = ssz;
 663                                 break;
 664                         default:
 665                                 break;
 666                         }
 667
 668                         if (NULL == rhs)
 669                                 break;
 670
 671                         for (i = 0; i < rsz; i++)
 672                                 sz += (*p->width)(p, *rhs++);
 673                         break;
 674                 case (ASCII_NBRSP):
 675                         sz += (*p->width)(p, ' ');
 676                         cp++;
 677                         break;
 678                 case (ASCII_HYPH):
 679                         sz += (*p->width)(p, '-');
 680                         cp++;
 681                         break;
 682                 default:
 683                         break;
 684                 }
 685         }
 686
 687         return(sz);
 688 }
 689
 690 /* ARGSUSED */
 691 size_t
 692 term_vspan(const struct termp *p, const struct roffsu *su)
 693 {
 694         double           r;
 695
 696         switch (su->unit) {
 697         case (SCALE_CM):
 698                 r = su->scale * 2;
 699                 break;
 700         case (SCALE_IN):
 701                 r = su->scale * 6;
 702                 break;
 703         case (SCALE_PC):
 704                 r = su->scale;
 705                 break;
 706         case (SCALE_PT):
 707                 r = su->scale / 8;
 708                 break;
 709         case (SCALE_MM):
 710                 r = su->scale / 1000;
 711                 break;
 712         case (SCALE_VS):
 713                 r = su->scale;
 714                 break;
 715         default:
 716                 r = su->scale - 1;
 717                 break;
 718         }
 719
 720         if (r < 0.0)
 721                 r = 0.0;
 722         return(/* LINTED */(size_t)
 723                         r);
 724 }
 725
 726 size_t
 727 term_hspan(const struct termp *p, const struct roffsu *su)
 728 {
 729         double           v;
 730
 731         v = ((*p->hspan)(p, su));
 732         if (v < 0.0)
 733                 v = 0.0;
 734         return((size_t) /* LINTED */
 735                         v);
 736 }