contrib/mdocml/roff.c

   1 /*      $Id: roff.c,v 1.172 2011/10/24 21:41:45 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #include "mandoc.h"
  28 #include "libroff.h"
  29 #include "libmandoc.h"
  30
  31 /* Maximum number of nested if-else conditionals. */
  32 #define RSTACK_MAX      128
  33
  34 /* Maximum number of string expansions per line, to break infinite loops. */
  35 #define EXPAND_LIMIT    1000
  36
  37 enum    rofft {
  38         ROFF_ad,
  39         ROFF_am,
  40         ROFF_ami,
  41         ROFF_am1,
  42         ROFF_de,
  43         ROFF_dei,
  44         ROFF_de1,
  45         ROFF_ds,
  46         ROFF_el,
  47         ROFF_hy,
  48         ROFF_ie,
  49         ROFF_if,
  50         ROFF_ig,
  51         ROFF_it,
  52         ROFF_ne,
  53         ROFF_nh,
  54         ROFF_nr,
  55         ROFF_ns,
  56         ROFF_ps,
  57         ROFF_rm,
  58         ROFF_so,
  59         ROFF_ta,
  60         ROFF_tr,
  61         ROFF_TS,
  62         ROFF_TE,
  63         ROFF_T_,
  64         ROFF_EQ,
  65         ROFF_EN,
  66         ROFF_cblock,
  67         ROFF_ccond,
  68         ROFF_USERDEF,
  69         ROFF_MAX
  70 };
  71
  72 enum    roffrule {
  73         ROFFRULE_ALLOW,
  74         ROFFRULE_DENY
  75 };
  76
  77 /*
  78  * A single register entity.  If "set" is zero, the value of the
  79  * register should be the default one, which is per-register.
  80  * Registers are assumed to be unsigned ints for now.
  81  */
  82 struct  reg {
  83         int              set; /* whether set or not */
  84         unsigned int     u; /* unsigned integer */
  85 };
  86
  87 /*
  88  * An incredibly-simple string buffer.
  89  */
  90 struct  roffstr {
  91         char            *p; /* nil-terminated buffer */
  92         size_t           sz; /* saved strlen(p) */
  93 };
  94
  95 /*
  96  * A key-value roffstr pair as part of a singly-linked list.
  97  */
  98 struct  roffkv {
  99         struct roffstr   key;
 100         struct roffstr   val;
 101         struct roffkv   *next; /* next in list */
 102 };
 103
 104 struct  roff {
 105         struct mparse   *parse; /* parse point */
 106         struct roffnode *last; /* leaf of stack */
 107         enum roffrule    rstack[RSTACK_MAX]; /* stack of !`ie' rules */
 108         int              rstackpos; /* position in rstack */
 109         struct reg       regs[REG__MAX];
 110         struct roffkv   *strtab; /* user-defined strings & macros */
 111         struct roffkv   *xmbtab; /* multi-byte trans table (`tr') */
 112         struct roffstr  *xtab; /* single-byte trans table (`tr') */
 113         const char      *current_string; /* value of last called user macro */
 114         struct tbl_node *first_tbl; /* first table parsed */
 115         struct tbl_node *last_tbl; /* last table parsed */
 116         struct tbl_node *tbl; /* current table being parsed */
 117         struct eqn_node *last_eqn; /* last equation parsed */
 118         struct eqn_node *first_eqn; /* first equation parsed */
 119         struct eqn_node *eqn; /* current equation being parsed */
 120 };
 121
 122 struct  roffnode {
 123         enum rofft       tok; /* type of node */
 124         struct roffnode *parent; /* up one in stack */
 125         int              line; /* parse line */
 126         int              col; /* parse col */
 127         char            *name; /* node name, e.g. macro name */
 128         char            *end; /* end-rules: custom token */
 129         int              endspan; /* end-rules: next-line or infty */
 130         enum roffrule    rule; /* current evaluation rule */
 131 };
 132
 133 #define ROFF_ARGS        struct roff *r, /* parse ctx */ \
 134                          enum rofft tok, /* tok of macro */ \
 135                          char **bufp, /* input buffer */ \
 136                          size_t *szp, /* size of input buffer */ \
 137                          int ln, /* parse line */ \
 138                          int ppos, /* original pos in buffer */ \
 139                          int pos, /* current pos in buffer */ \
 140                          int *offs /* reset offset of buffer data */
 141
 142 typedef enum rofferr (*roffproc)(ROFF_ARGS);
 143
 144 struct  roffmac {
 145         const char      *name; /* macro name */
 146         roffproc         proc; /* process new macro */
 147         roffproc         text; /* process as child text of macro */
 148         roffproc         sub; /* process as child of macro */
 149         int              flags;
 150 #define ROFFMAC_STRUCT  (1 << 0) /* always interpret */
 151         struct roffmac  *next;
 152 };
 153
 154 struct  predef {
 155         const char      *name; /* predefined input name */
 156         const char      *str; /* replacement symbol */
 157 };
 158
 159 #define PREDEF(__name, __str) \
 160         { (__name), (__str) },
 161
 162 static  enum rofft       roffhash_find(const char *, size_t);
 163 static  void             roffhash_init(void);
 164 static  void             roffnode_cleanscope(struct roff *);
 165 static  void             roffnode_pop(struct roff *);
 166 static  void             roffnode_push(struct roff *, enum rofft,
 167                                 const char *, int, int);
 168 static  enum rofferr     roff_block(ROFF_ARGS);
 169 static  enum rofferr     roff_block_text(ROFF_ARGS);
 170 static  enum rofferr     roff_block_sub(ROFF_ARGS);
 171 static  enum rofferr     roff_cblock(ROFF_ARGS);
 172 static  enum rofferr     roff_ccond(ROFF_ARGS);
 173 static  enum rofferr     roff_cond(ROFF_ARGS);
 174 static  enum rofferr     roff_cond_text(ROFF_ARGS);
 175 static  enum rofferr     roff_cond_sub(ROFF_ARGS);
 176 static  enum rofferr     roff_ds(ROFF_ARGS);
 177 static  enum roffrule    roff_evalcond(const char *, int *);
 178 static  void             roff_free1(struct roff *);
 179 static  void             roff_freestr(struct roffkv *);
 180 static  char            *roff_getname(struct roff *, char **, int, int);
 181 static  const char      *roff_getstrn(const struct roff *,
 182                                 const char *, size_t);
 183 static  enum rofferr     roff_line_ignore(ROFF_ARGS);
 184 static  enum rofferr     roff_nr(ROFF_ARGS);
 185 static  void             roff_openeqn(struct roff *, const char *,
 186                                 int, int, const char *);
 187 static  enum rofft       roff_parse(struct roff *, const char *, int *);
 188 static  enum rofferr     roff_parsetext(char *);
 189 static  enum rofferr     roff_res(struct roff *,
 190                                 char **, size_t *, int, int);
 191 static  enum rofferr     roff_rm(ROFF_ARGS);
 192 static  void             roff_setstr(struct roff *,
 193                                 const char *, const char *, int);
 194 static  void             roff_setstrn(struct roffkv **, const char *,
 195                                 size_t, const char *, size_t, int);
 196 static  enum rofferr     roff_so(ROFF_ARGS);
 197 static  enum rofferr     roff_tr(ROFF_ARGS);
 198 static  enum rofferr     roff_TE(ROFF_ARGS);
 199 static  enum rofferr     roff_TS(ROFF_ARGS);
 200 static  enum rofferr     roff_EQ(ROFF_ARGS);
 201 static  enum rofferr     roff_EN(ROFF_ARGS);
 202 static  enum rofferr     roff_T_(ROFF_ARGS);
 203 static  enum rofferr     roff_userdef(ROFF_ARGS);
 204
 205 /* See roffhash_find() */
 206
 207 #define ASCII_HI         126
 208 #define ASCII_LO         33
 209 #define HASHWIDTH       (ASCII_HI - ASCII_LO + 1)
 210
 211 static  struct roffmac  *hash[HASHWIDTH];
 212
 213 static  struct roffmac   roffs[ROFF_MAX] = {
 214         { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
 215         { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 216         { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 217         { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 218         { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 219         { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 220         { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 221         { "ds", roff_ds, NULL, NULL, 0, NULL },
 222         { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 223         { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
 224         { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 225         { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 226         { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 227         { "it", roff_line_ignore, NULL, NULL, 0, NULL },
 228         { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
 229         { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
 230         { "nr", roff_nr, NULL, NULL, 0, NULL },
 231         { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
 232         { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
 233         { "rm", roff_rm, NULL, NULL, 0, NULL },
 234         { "so", roff_so, NULL, NULL, 0, NULL },
 235         { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
 236         { "tr", roff_tr, NULL, NULL, 0, NULL },
 237         { "TS", roff_TS, NULL, NULL, 0, NULL },
 238         { "TE", roff_TE, NULL, NULL, 0, NULL },
 239         { "T&", roff_T_, NULL, NULL, 0, NULL },
 240         { "EQ", roff_EQ, NULL, NULL, 0, NULL },
 241         { "EN", roff_EN, NULL, NULL, 0, NULL },
 242         { ".", roff_cblock, NULL, NULL, 0, NULL },
 243         { "\\}", roff_ccond, NULL, NULL, 0, NULL },
 244         { NULL, roff_userdef, NULL, NULL, 0, NULL },
 245 };
 246
 247 /* Array of injected predefined strings. */
 248 #define PREDEFS_MAX      38
 249 static  const struct predef predefs[PREDEFS_MAX] = {
 250 #include "predefs.in"
 251 };
 252
 253 /* See roffhash_find() */
 254 #define ROFF_HASH(p)    (p[0] - ASCII_LO)
 255
 256 static void
 257 roffhash_init(void)
 258 {
 259         struct roffmac   *n;
 260         int               buc, i;
 261
 262         for (i = 0; i < (int)ROFF_USERDEF; i++) {
 263                 assert(roffs[i].name[0] >= ASCII_LO);
 264                 assert(roffs[i].name[0] <= ASCII_HI);
 265
 266                 buc = ROFF_HASH(roffs[i].name);
 267
 268                 if (NULL != (n = hash[buc])) {
 269                         for ( ; n->next; n = n->next)
 270                                 /* Do nothing. */ ;
 271                         n->next = &roffs[i];
 272                 } else
 273                         hash[buc] = &roffs[i];
 274         }
 275 }
 276
 277 /*
 278  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
 279  * the nil-terminated string name could be found.
 280  */
 281 static enum rofft
 282 roffhash_find(const char *p, size_t s)
 283 {
 284         int              buc;
 285         struct roffmac  *n;
 286
 287         /*
 288          * libroff has an extremely simple hashtable, for the time
 289          * being, which simply keys on the first character, which must
 290          * be printable, then walks a chain.  It works well enough until
 291          * optimised.
 292          */
 293
 294         if (p[0] < ASCII_LO || p[0] > ASCII_HI)
 295                 return(ROFF_MAX);
 296
 297         buc = ROFF_HASH(p);
 298
 299         if (NULL == (n = hash[buc]))
 300                 return(ROFF_MAX);
 301         for ( ; n; n = n->next)
 302                 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
 303                         return((enum rofft)(n - roffs));
 304
 305         return(ROFF_MAX);
 306 }
 307
 308
 309 /*
 310  * Pop the current node off of the stack of roff instructions currently
 311  * pending.
 312  */
 313 static void
 314 roffnode_pop(struct roff *r)
 315 {
 316         struct roffnode *p;
 317
 318         assert(r->last);
 319         p = r->last;
 320
 321         r->last = r->last->parent;
 322         free(p->name);
 323         free(p->end);
 324         free(p);
 325 }
 326
 327
 328 /*
 329  * Push a roff node onto the instruction stack.  This must later be
 330  * removed with roffnode_pop().
 331  */
 332 static void
 333 roffnode_push(struct roff *r, enum rofft tok, const char *name,
 334                 int line, int col)
 335 {
 336         struct roffnode *p;
 337
 338         p = mandoc_calloc(1, sizeof(struct roffnode));
 339         p->tok = tok;
 340         if (name)
 341                 p->name = mandoc_strdup(name);
 342         p->parent = r->last;
 343         p->line = line;
 344         p->col = col;
 345         p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
 346
 347         r->last = p;
 348 }
 349
 350
 351 static void
 352 roff_free1(struct roff *r)
 353 {
 354         struct tbl_node *t;
 355         struct eqn_node *e;
 356         int              i;
 357
 358         while (NULL != (t = r->first_tbl)) {
 359                 r->first_tbl = t->next;
 360                 tbl_free(t);
 361         }
 362
 363         r->first_tbl = r->last_tbl = r->tbl = NULL;
 364
 365         while (NULL != (e = r->first_eqn)) {
 366                 r->first_eqn = e->next;
 367                 eqn_free(e);
 368         }
 369
 370         r->first_eqn = r->last_eqn = r->eqn = NULL;
 371
 372         while (r->last)
 373                 roffnode_pop(r);
 374
 375         roff_freestr(r->strtab);
 376         roff_freestr(r->xmbtab);
 377
 378         r->strtab = r->xmbtab = NULL;
 379
 380         if (r->xtab)
 381                 for (i = 0; i < 128; i++)
 382                         free(r->xtab[i].p);
 383
 384         free(r->xtab);
 385         r->xtab = NULL;
 386 }
 387
 388 void
 389 roff_reset(struct roff *r)
 390 {
 391         int              i;
 392
 393         roff_free1(r);
 394
 395         memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
 396
 397         for (i = 0; i < PREDEFS_MAX; i++)
 398                 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
 399 }
 400
 401
 402 void
 403 roff_free(struct roff *r)
 404 {
 405
 406         roff_free1(r);
 407         free(r);
 408 }
 409
 410
 411 struct roff *
 412 roff_alloc(struct mparse *parse)
 413 {
 414         struct roff     *r;
 415         int              i;
 416
 417         r = mandoc_calloc(1, sizeof(struct roff));
 418         r->parse = parse;
 419         r->rstackpos = -1;
 420
 421         roffhash_init();
 422
 423         for (i = 0; i < PREDEFS_MAX; i++)
 424                 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
 425
 426         return(r);
 427 }
 428
 429 /*
 430  * Pre-filter each and every line for reserved words (one beginning with
 431  * `\*', e.g., `\*(ab').  These must be handled before the actual line
 432  * is processed.
 433  * This also checks the syntax of regular escapes.
 434  */
 435 static enum rofferr
 436 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
 437 {
 438         enum mandoc_esc  esc;
 439         const char      *stesc; /* start of an escape sequence ('\\') */
 440         const char      *stnam; /* start of the name, after "[(*" */
 441         const char      *cp;    /* end of the name, e.g. before ']' */
 442         const char      *res;   /* the string to be substituted */
 443         int              i, maxl, expand_count;
 444         size_t           nsz;
 445         char            *n;
 446
 447         expand_count = 0;
 448
 449 again:
 450         cp = *bufp + pos;
 451         while (NULL != (cp = strchr(cp, '\\'))) {
 452                 stesc = cp++;
 453
 454                 /*
 455                  * The second character must be an asterisk.
 456                  * If it isn't, skip it anyway:  It is escaped,
 457                  * so it can't start another escape sequence.
 458                  */
 459
 460                 if ('\0' == *cp)
 461                         return(ROFF_CONT);
 462
 463                 if ('*' != *cp) {
 464                         res = cp;
 465                         esc = mandoc_escape(&cp, NULL, NULL);
 466                         if (ESCAPE_ERROR != esc)
 467                                 continue;
 468                         cp = res;
 469                         mandoc_msg
 470                                 (MANDOCERR_BADESCAPE, r->parse,
 471                                  ln, (int)(stesc - *bufp), NULL);
 472                         return(ROFF_CONT);
 473                 }
 474
 475                 cp++;
 476
 477                 /*
 478                  * The third character decides the length
 479                  * of the name of the string.
 480                  * Save a pointer to the name.
 481                  */
 482
 483                 switch (*cp) {
 484                 case ('\0'):
 485                         return(ROFF_CONT);
 486                 case ('('):
 487                         cp++;
 488                         maxl = 2;
 489                         break;
 490                 case ('['):
 491                         cp++;
 492                         maxl = 0;
 493                         break;
 494                 default:
 495                         maxl = 1;
 496                         break;
 497                 }
 498                 stnam = cp;
 499
 500                 /* Advance to the end of the name. */
 501
 502                 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
 503                         if ('\0' == *cp) {
 504                                 mandoc_msg
 505                                         (MANDOCERR_BADESCAPE,
 506                                          r->parse, ln,
 507                                          (int)(stesc - *bufp), NULL);
 508                                 return(ROFF_CONT);
 509                         }
 510                         if (0 == maxl && ']' == *cp)
 511                                 break;
 512                 }
 513
 514                 /*
 515                  * Retrieve the replacement string; if it is
 516                  * undefined, resume searching for escapes.
 517                  */
 518
 519                 res = roff_getstrn(r, stnam, (size_t)i);
 520
 521                 if (NULL == res) {
 522                         mandoc_msg
 523                                 (MANDOCERR_BADESCAPE, r->parse,
 524                                  ln, (int)(stesc - *bufp), NULL);
 525                         res = "";
 526                 }
 527
 528                 /* Replace the escape sequence by the string. */
 529
 530                 pos = stesc - *bufp;
 531
 532                 nsz = *szp + strlen(res) + 1;
 533                 n = mandoc_malloc(nsz);
 534
 535                 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
 536                 strlcat(n, res, nsz);
 537                 strlcat(n, cp + (maxl ? 0 : 1), nsz);
 538
 539                 free(*bufp);
 540
 541                 *bufp = n;
 542                 *szp = nsz;
 543
 544                 if (EXPAND_LIMIT >= ++expand_count)
 545                         goto again;
 546
 547                 /* Just leave the string unexpanded. */
 548                 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
 549                 return(ROFF_IGN);
 550         }
 551         return(ROFF_CONT);
 552 }
 553
 554 /*
 555  * Process text streams: convert all breakable hyphens into ASCII_HYPH.
 556  */
 557 static enum rofferr
 558 roff_parsetext(char *p)
 559 {
 560         size_t           sz;
 561         const char      *start;
 562         enum mandoc_esc  esc;
 563
 564         start = p;
 565
 566         while ('\0' != *p) {
 567                 sz = strcspn(p, "-\\");
 568                 p += sz;
 569
 570                 if ('\0' == *p)
 571                         break;
 572
 573                 if ('\\' == *p) {
 574                         /* Skip over escapes. */
 575                         p++;
 576                         esc = mandoc_escape
 577                                 ((const char **)&p, NULL, NULL);
 578                         if (ESCAPE_ERROR == esc)
 579                                 break;
 580                         continue;
 581                 } else if (p == start) {
 582                         p++;
 583                         continue;
 584                 }
 585
 586                 if (isalpha((unsigned char)p[-1]) &&
 587                     isalpha((unsigned char)p[1]))
 588                         *p = ASCII_HYPH;
 589                 p++;
 590         }
 591
 592         return(ROFF_CONT);
 593 }
 594
 595 enum rofferr
 596 roff_parseln(struct roff *r, int ln, char **bufp,
 597                 size_t *szp, int pos, int *offs)
 598 {
 599         enum rofft       t;
 600         enum rofferr     e;
 601         int              ppos, ctl;
 602
 603         /*
 604          * Run the reserved-word filter only if we have some reserved
 605          * words to fill in.
 606          */
 607
 608         e = roff_res(r, bufp, szp, ln, pos);
 609         if (ROFF_IGN == e)
 610                 return(e);
 611         assert(ROFF_CONT == e);
 612
 613         ppos = pos;
 614         ctl = mandoc_getcontrol(*bufp, &pos);
 615
 616         /*
 617          * First, if a scope is open and we're not a macro, pass the
 618          * text through the macro's filter.  If a scope isn't open and
 619          * we're not a macro, just let it through.
 620          * Finally, if there's an equation scope open, divert it into it
 621          * no matter our state.
 622          */
 623
 624         if (r->last && ! ctl) {
 625                 t = r->last->tok;
 626                 assert(roffs[t].text);
 627                 e = (*roffs[t].text)
 628                         (r, t, bufp, szp, ln, pos, pos, offs);
 629                 assert(ROFF_IGN == e || ROFF_CONT == e);
 630                 if (ROFF_CONT != e)
 631                         return(e);
 632                 if (r->eqn)
 633                         return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
 634                 if (r->tbl)
 635                         return(tbl_read(r->tbl, ln, *bufp, pos));
 636                 return(roff_parsetext(*bufp + pos));
 637         } else if ( ! ctl) {
 638                 if (r->eqn)
 639                         return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
 640                 if (r->tbl)
 641                         return(tbl_read(r->tbl, ln, *bufp, pos));
 642                 return(roff_parsetext(*bufp + pos));
 643         } else if (r->eqn)
 644                 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
 645
 646         /*
 647          * If a scope is open, go to the child handler for that macro,
 648          * as it may want to preprocess before doing anything with it.
 649          * Don't do so if an equation is open.
 650          */
 651
 652         if (r->last) {
 653                 t = r->last->tok;
 654                 assert(roffs[t].sub);
 655                 return((*roffs[t].sub)
 656                                 (r, t, bufp, szp,
 657                                  ln, ppos, pos, offs));
 658         }
 659
 660         /*
 661          * Lastly, as we've no scope open, try to look up and execute
 662          * the new macro.  If no macro is found, simply return and let
 663          * the compilers handle it.
 664          */
 665
 666         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
 667                 return(ROFF_CONT);
 668
 669         assert(roffs[t].proc);
 670         return((*roffs[t].proc)
 671                         (r, t, bufp, szp,
 672                          ln, ppos, pos, offs));
 673 }
 674
 675
 676 void
 677 roff_endparse(struct roff *r)
 678 {
 679
 680         if (r->last)
 681                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 682                                 r->last->line, r->last->col, NULL);
 683
 684         if (r->eqn) {
 685                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 686                                 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
 687                 eqn_end(&r->eqn);
 688         }
 689
 690         if (r->tbl) {
 691                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 692                                 r->tbl->line, r->tbl->pos, NULL);
 693                 tbl_end(&r->tbl);
 694         }
 695 }
 696
 697 /*
 698  * Parse a roff node's type from the input buffer.  This must be in the
 699  * form of ".foo xxx" in the usual way.
 700  */
 701 static enum rofft
 702 roff_parse(struct roff *r, const char *buf, int *pos)
 703 {
 704         const char      *mac;
 705         size_t           maclen;
 706         enum rofft       t;
 707
 708         if ('\0' == buf[*pos] || '"' == buf[*pos] ||
 709                         '\t' == buf[*pos] || ' ' == buf[*pos])
 710                 return(ROFF_MAX);
 711
 712         /*
 713          * We stop the macro parse at an escape, tab, space, or nil.
 714          * However, `\}' is also a valid macro, so make sure we don't
 715          * clobber it by seeing the `\' as the end of token.
 716          */
 717
 718         mac = buf + *pos;
 719         maclen = strcspn(mac + 1, " \\\t\0") + 1;
 720
 721         t = (r->current_string = roff_getstrn(r, mac, maclen))
 722             ? ROFF_USERDEF : roffhash_find(mac, maclen);
 723
 724         *pos += (int)maclen;
 725
 726         while (buf[*pos] && ' ' == buf[*pos])
 727                 (*pos)++;
 728
 729         return(t);
 730 }
 731
 732 /* ARGSUSED */
 733 static enum rofferr
 734 roff_cblock(ROFF_ARGS)
 735 {
 736
 737         /*
 738          * A block-close `..' should only be invoked as a child of an
 739          * ignore macro, otherwise raise a warning and just ignore it.
 740          */
 741
 742         if (NULL == r->last) {
 743                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 744                 return(ROFF_IGN);
 745         }
 746
 747         switch (r->last->tok) {
 748         case (ROFF_am):
 749                 /* FALLTHROUGH */
 750         case (ROFF_ami):
 751                 /* FALLTHROUGH */
 752         case (ROFF_am1):
 753                 /* FALLTHROUGH */
 754         case (ROFF_de):
 755                 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
 756                 /* FALLTHROUGH */
 757         case (ROFF_dei):
 758                 /* FALLTHROUGH */
 759         case (ROFF_ig):
 760                 break;
 761         default:
 762                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 763                 return(ROFF_IGN);
 764         }
 765
 766         if ((*bufp)[pos])
 767                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 768
 769         roffnode_pop(r);
 770         roffnode_cleanscope(r);
 771         return(ROFF_IGN);
 772
 773 }
 774
 775
 776 static void
 777 roffnode_cleanscope(struct roff *r)
 778 {
 779
 780         while (r->last) {
 781                 if (--r->last->endspan < 0)
 782                         break;
 783                 roffnode_pop(r);
 784         }
 785 }
 786
 787
 788 /* ARGSUSED */
 789 static enum rofferr
 790 roff_ccond(ROFF_ARGS)
 791 {
 792
 793         if (NULL == r->last) {
 794                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 795                 return(ROFF_IGN);
 796         }
 797
 798         switch (r->last->tok) {
 799         case (ROFF_el):
 800                 /* FALLTHROUGH */
 801         case (ROFF_ie):
 802                 /* FALLTHROUGH */
 803         case (ROFF_if):
 804                 break;
 805         default:
 806                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 807                 return(ROFF_IGN);
 808         }
 809
 810         if (r->last->endspan > -1) {
 811                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 812                 return(ROFF_IGN);
 813         }
 814
 815         if ((*bufp)[pos])
 816                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 817
 818         roffnode_pop(r);
 819         roffnode_cleanscope(r);
 820         return(ROFF_IGN);
 821 }
 822
 823
 824 /* ARGSUSED */
 825 static enum rofferr
 826 roff_block(ROFF_ARGS)
 827 {
 828         int             sv;
 829         size_t          sz;
 830         char            *name;
 831
 832         name = NULL;
 833
 834         if (ROFF_ig != tok) {
 835                 if ('\0' == (*bufp)[pos]) {
 836                         mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
 837                         return(ROFF_IGN);
 838                 }
 839
 840                 /*
 841                  * Re-write `de1', since we don't really care about
 842                  * groff's strange compatibility mode, into `de'.
 843                  */
 844
 845                 if (ROFF_de1 == tok)
 846                         tok = ROFF_de;
 847                 if (ROFF_de == tok)
 848                         name = *bufp + pos;
 849                 else
 850                         mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
 851                             roffs[tok].name);
 852
 853                 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 854                         pos++;
 855
 856                 while (isspace((unsigned char)(*bufp)[pos]))
 857                         (*bufp)[pos++] = '\0';
 858         }
 859
 860         roffnode_push(r, tok, name, ln, ppos);
 861
 862         /*
 863          * At the beginning of a `de' macro, clear the existing string
 864          * with the same name, if there is one.  New content will be
 865          * added from roff_block_text() in multiline mode.
 866          */
 867
 868         if (ROFF_de == tok)
 869                 roff_setstr(r, name, "", 0);
 870
 871         if ('\0' == (*bufp)[pos])
 872                 return(ROFF_IGN);
 873
 874         /* If present, process the custom end-of-line marker. */
 875
 876         sv = pos;
 877         while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 878                 pos++;
 879
 880         /*
 881          * Note: groff does NOT like escape characters in the input.
 882          * Instead of detecting this, we're just going to let it fly and
 883          * to hell with it.
 884          */
 885
 886         assert(pos > sv);
 887         sz = (size_t)(pos - sv);
 888
 889         if (1 == sz && '.' == (*bufp)[sv])
 890                 return(ROFF_IGN);
 891
 892         r->last->end = mandoc_malloc(sz + 1);
 893
 894         memcpy(r->last->end, *bufp + sv, sz);
 895         r->last->end[(int)sz] = '\0';
 896
 897         if ((*bufp)[pos])
 898                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 899
 900         return(ROFF_IGN);
 901 }
 902
 903
 904 /* ARGSUSED */
 905 static enum rofferr
 906 roff_block_sub(ROFF_ARGS)
 907 {
 908         enum rofft      t;
 909         int             i, j;
 910
 911         /*
 912          * First check whether a custom macro exists at this level.  If
 913          * it does, then check against it.  This is some of groff's
 914          * stranger behaviours.  If we encountered a custom end-scope
 915          * tag and that tag also happens to be a "real" macro, then we
 916          * need to try interpreting it again as a real macro.  If it's
 917          * not, then return ignore.  Else continue.
 918          */
 919
 920         if (r->last->end) {
 921                 for (i = pos, j = 0; r->last->end[j]; j++, i++)
 922                         if ((*bufp)[i] != r->last->end[j])
 923                                 break;
 924
 925                 if ('\0' == r->last->end[j] &&
 926                                 ('\0' == (*bufp)[i] ||
 927                                  ' ' == (*bufp)[i] ||
 928                                  '\t' == (*bufp)[i])) {
 929                         roffnode_pop(r);
 930                         roffnode_cleanscope(r);
 931
 932                         while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
 933                                 i++;
 934
 935                         pos = i;
 936                         if (ROFF_MAX != roff_parse(r, *bufp, &pos))
 937                                 return(ROFF_RERUN);
 938                         return(ROFF_IGN);
 939                 }
 940         }
 941
 942         /*
 943          * If we have no custom end-query or lookup failed, then try
 944          * pulling it out of the hashtable.
 945          */
 946
 947         t = roff_parse(r, *bufp, &pos);
 948
 949         /*
 950          * Macros other than block-end are only significant
 951          * in `de' blocks; elsewhere, simply throw them away.
 952          */
 953         if (ROFF_cblock != t) {
 954                 if (ROFF_de == tok)
 955                         roff_setstr(r, r->last->name, *bufp + ppos, 1);
 956                 return(ROFF_IGN);
 957         }
 958
 959         assert(roffs[t].proc);
 960         return((*roffs[t].proc)(r, t, bufp, szp,
 961                                 ln, ppos, pos, offs));
 962 }
 963
 964
 965 /* ARGSUSED */
 966 static enum rofferr
 967 roff_block_text(ROFF_ARGS)
 968 {
 969
 970         if (ROFF_de == tok)
 971                 roff_setstr(r, r->last->name, *bufp + pos, 1);
 972
 973         return(ROFF_IGN);
 974 }
 975
 976
 977 /* ARGSUSED */
 978 static enum rofferr
 979 roff_cond_sub(ROFF_ARGS)
 980 {
 981         enum rofft       t;
 982         enum roffrule    rr;
 983         char            *ep;
 984
 985         rr = r->last->rule;
 986         roffnode_cleanscope(r);
 987
 988         /*
 989          * If the macro is unknown, first check if it contains a closing
 990          * delimiter `\}'.  If it does, close out our scope and return
 991          * the currently-scoped rule (ignore or continue).  Else, drop
 992          * into the currently-scoped rule.
 993          */
 994
 995         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
 996                 ep = &(*bufp)[pos];
 997                 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
 998                         ep++;
 999                         if ('}' != *ep)
1000                                 continue;
1001
1002                         /*
1003                          * Make the \} go away.
1004                          * This is a little haphazard, as it's not quite
1005                          * clear how nroff does this.
1006                          * If we're at the end of line, then just chop
1007                          * off the \} and resize the buffer.
1008                          * If we aren't, then conver it to spaces.
1009                          */
1010
1011                         if ('\0' == *(ep + 1)) {
1012                                 *--ep = '\0';
1013                                 *szp -= 2;
1014                         } else
1015                                 *(ep - 1) = *ep = ' ';
1016
1017                         roff_ccond(r, ROFF_ccond, bufp, szp,
1018                                         ln, pos, pos + 2, offs);
1019                         break;
1020                 }
1021                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1022         }
1023
1024         /*
1025          * A denied conditional must evaluate its children if and only
1026          * if they're either structurally required (such as loops and
1027          * conditionals) or a closing macro.
1028          */
1029
1030         if (ROFFRULE_DENY == rr)
1031                 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1032                         if (ROFF_ccond != t)
1033                                 return(ROFF_IGN);
1034
1035         assert(roffs[t].proc);
1036         return((*roffs[t].proc)(r, t, bufp, szp,
1037                                 ln, ppos, pos, offs));
1038 }
1039
1040 /* ARGSUSED */
1041 static enum rofferr
1042 roff_cond_text(ROFF_ARGS)
1043 {
1044         char            *ep;
1045         enum roffrule    rr;
1046
1047         rr = r->last->rule;
1048         roffnode_cleanscope(r);
1049
1050         ep = &(*bufp)[pos];
1051         for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1052                 ep++;
1053                 if ('}' != *ep)
1054                         continue;
1055                 *ep = '&';
1056                 roff_ccond(r, ROFF_ccond, bufp, szp,
1057                                 ln, pos, pos + 2, offs);
1058         }
1059         return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1060 }
1061
1062 static enum roffrule
1063 roff_evalcond(const char *v, int *pos)
1064 {
1065
1066         switch (v[*pos]) {
1067         case ('n'):
1068                 (*pos)++;
1069                 return(ROFFRULE_ALLOW);
1070         case ('e'):
1071                 /* FALLTHROUGH */
1072         case ('o'):
1073                 /* FALLTHROUGH */
1074         case ('t'):
1075                 (*pos)++;
1076                 return(ROFFRULE_DENY);
1077         default:
1078                 break;
1079         }
1080
1081         while (v[*pos] && ' ' != v[*pos])
1082                 (*pos)++;
1083         return(ROFFRULE_DENY);
1084 }
1085
1086 /* ARGSUSED */
1087 static enum rofferr
1088 roff_line_ignore(ROFF_ARGS)
1089 {
1090
1091         if (ROFF_it == tok)
1092                 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1093
1094         return(ROFF_IGN);
1095 }
1096
1097 /* ARGSUSED */
1098 static enum rofferr
1099 roff_cond(ROFF_ARGS)
1100 {
1101         int              sv;
1102         enum roffrule    rule;
1103
1104         /*
1105          * An `.el' has no conditional body: it will consume the value
1106          * of the current rstack entry set in prior `ie' calls or
1107          * defaults to DENY.
1108          *
1109          * If we're not an `el', however, then evaluate the conditional.
1110          */
1111
1112         rule = ROFF_el == tok ?
1113                 (r->rstackpos < 0 ?
1114                  ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1115                 roff_evalcond(*bufp, &pos);
1116
1117         sv = pos;
1118         while (' ' == (*bufp)[pos])
1119                 pos++;
1120
1121         /*
1122          * Roff is weird.  If we have just white-space after the
1123          * conditional, it's considered the BODY and we exit without
1124          * really doing anything.  Warn about this.  It's probably
1125          * wrong.
1126          */
1127
1128         if ('\0' == (*bufp)[pos] && sv != pos) {
1129                 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1130                 return(ROFF_IGN);
1131         }
1132
1133         roffnode_push(r, tok, NULL, ln, ppos);
1134
1135         r->last->rule = rule;
1136
1137         /*
1138          * An if-else will put the NEGATION of the current evaluated
1139          * conditional into the stack of rules.
1140          */
1141
1142         if (ROFF_ie == tok) {
1143                 if (r->rstackpos == RSTACK_MAX - 1) {
1144                         mandoc_msg(MANDOCERR_MEM,
1145                                 r->parse, ln, ppos, NULL);
1146                         return(ROFF_ERR);
1147                 }
1148                 r->rstack[++r->rstackpos] =
1149                         ROFFRULE_DENY == r->last->rule ?
1150                         ROFFRULE_ALLOW : ROFFRULE_DENY;
1151         }
1152
1153         /* If the parent has false as its rule, then so do we. */
1154
1155         if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1156                 r->last->rule = ROFFRULE_DENY;
1157
1158         /*
1159          * Determine scope.  If we're invoked with "\{" trailing the
1160          * conditional, then we're in a multiline scope.  Else our scope
1161          * expires on the next line.
1162          */
1163
1164         r->last->endspan = 1;
1165
1166         if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1167                 r->last->endspan = -1;
1168                 pos += 2;
1169         }
1170
1171         /*
1172          * If there are no arguments on the line, the next-line scope is
1173          * assumed.
1174          */
1175
1176         if ('\0' == (*bufp)[pos])
1177                 return(ROFF_IGN);
1178
1179         /* Otherwise re-run the roff parser after recalculating. */
1180
1181         *offs = pos;
1182         return(ROFF_RERUN);
1183 }
1184
1185
1186 /* ARGSUSED */
1187 static enum rofferr
1188 roff_ds(ROFF_ARGS)
1189 {
1190         char            *name, *string;
1191
1192         /*
1193          * A symbol is named by the first word following the macro
1194          * invocation up to a space.  Its value is anything after the
1195          * name's trailing whitespace and optional double-quote.  Thus,
1196          *
1197          *  [.ds foo "bar  "     ]
1198          *
1199          * will have `bar  "     ' as its value.
1200          */
1201
1202         string = *bufp + pos;
1203         name = roff_getname(r, &string, ln, pos);
1204         if ('\0' == *name)
1205                 return(ROFF_IGN);
1206
1207         /* Read past initial double-quote. */
1208         if ('"' == *string)
1209                 string++;
1210
1211         /* The rest is the value. */
1212         roff_setstr(r, name, string, 0);
1213         return(ROFF_IGN);
1214 }
1215
1216 int
1217 roff_regisset(const struct roff *r, enum regs reg)
1218 {
1219
1220         return(r->regs[(int)reg].set);
1221 }
1222
1223 unsigned int
1224 roff_regget(const struct roff *r, enum regs reg)
1225 {
1226
1227         return(r->regs[(int)reg].u);
1228 }
1229
1230 void
1231 roff_regunset(struct roff *r, enum regs reg)
1232 {
1233
1234         r->regs[(int)reg].set = 0;
1235 }
1236
1237 /* ARGSUSED */
1238 static enum rofferr
1239 roff_nr(ROFF_ARGS)
1240 {
1241         const char      *key;
1242         char            *val;
1243         int              iv;
1244
1245         val = *bufp + pos;
1246         key = roff_getname(r, &val, ln, pos);
1247
1248         if (0 == strcmp(key, "nS")) {
1249                 r->regs[(int)REG_nS].set = 1;
1250                 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1251                         r->regs[(int)REG_nS].u = (unsigned)iv;
1252                 else
1253                         r->regs[(int)REG_nS].u = 0u;
1254         }
1255
1256         return(ROFF_IGN);
1257 }
1258
1259 /* ARGSUSED */
1260 static enum rofferr
1261 roff_rm(ROFF_ARGS)
1262 {
1263         const char       *name;
1264         char             *cp;
1265
1266         cp = *bufp + pos;
1267         while ('\0' != *cp) {
1268                 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1269                 if ('\0' != *name)
1270                         roff_setstr(r, name, NULL, 0);
1271         }
1272         return(ROFF_IGN);
1273 }
1274
1275 /* ARGSUSED */
1276 static enum rofferr
1277 roff_TE(ROFF_ARGS)
1278 {
1279
1280         if (NULL == r->tbl)
1281                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1282         else
1283                 tbl_end(&r->tbl);
1284
1285         return(ROFF_IGN);
1286 }
1287
1288 /* ARGSUSED */
1289 static enum rofferr
1290 roff_T_(ROFF_ARGS)
1291 {
1292
1293         if (NULL == r->tbl)
1294                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1295         else
1296                 tbl_restart(ppos, ln, r->tbl);
1297
1298         return(ROFF_IGN);
1299 }
1300
1301 #if 0
1302 static int
1303 roff_closeeqn(struct roff *r)
1304 {
1305
1306         return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1307 }
1308 #endif
1309
1310 static void
1311 roff_openeqn(struct roff *r, const char *name, int line,
1312                 int offs, const char *buf)
1313 {
1314         struct eqn_node *e;
1315         int              poff;
1316
1317         assert(NULL == r->eqn);
1318         e = eqn_alloc(name, offs, line, r->parse);
1319
1320         if (r->last_eqn)
1321                 r->last_eqn->next = e;
1322         else
1323                 r->first_eqn = r->last_eqn = e;
1324
1325         r->eqn = r->last_eqn = e;
1326
1327         if (buf) {
1328                 poff = 0;
1329                 eqn_read(&r->eqn, line, buf, offs, &poff);
1330         }
1331 }
1332
1333 /* ARGSUSED */
1334 static enum rofferr
1335 roff_EQ(ROFF_ARGS)
1336 {
1337
1338         roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1339         return(ROFF_IGN);
1340 }
1341
1342 /* ARGSUSED */
1343 static enum rofferr
1344 roff_EN(ROFF_ARGS)
1345 {
1346
1347         mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1348         return(ROFF_IGN);
1349 }
1350
1351 /* ARGSUSED */
1352 static enum rofferr
1353 roff_TS(ROFF_ARGS)
1354 {
1355         struct tbl_node *t;
1356
1357         if (r->tbl) {
1358                 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1359                 tbl_end(&r->tbl);
1360         }
1361
1362         t = tbl_alloc(ppos, ln, r->parse);
1363
1364         if (r->last_tbl)
1365                 r->last_tbl->next = t;
1366         else
1367                 r->first_tbl = r->last_tbl = t;
1368
1369         r->tbl = r->last_tbl = t;
1370         return(ROFF_IGN);
1371 }
1372
1373 /* ARGSUSED */
1374 static enum rofferr
1375 roff_tr(ROFF_ARGS)
1376 {
1377         const char      *p, *first, *second;
1378         size_t           fsz, ssz;
1379         enum mandoc_esc  esc;
1380
1381         p = *bufp + pos;
1382
1383         if ('\0' == *p) {
1384                 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1385                 return(ROFF_IGN);
1386         }
1387
1388         while ('\0' != *p) {
1389                 fsz = ssz = 1;
1390
1391                 first = p++;
1392                 if ('\\' == *first) {
1393                         esc = mandoc_escape(&p, NULL, NULL);
1394                         if (ESCAPE_ERROR == esc) {
1395                                 mandoc_msg
1396                                         (MANDOCERR_BADESCAPE, r->parse,
1397                                          ln, (int)(p - *bufp), NULL);
1398                                 return(ROFF_IGN);
1399                         }
1400                         fsz = (size_t)(p - first);
1401                 }
1402
1403                 second = p++;
1404                 if ('\\' == *second) {
1405                         esc = mandoc_escape(&p, NULL, NULL);
1406                         if (ESCAPE_ERROR == esc) {
1407                                 mandoc_msg
1408                                         (MANDOCERR_BADESCAPE, r->parse,
1409                                          ln, (int)(p - *bufp), NULL);
1410                                 return(ROFF_IGN);
1411                         }
1412                         ssz = (size_t)(p - second);
1413                 } else if ('\0' == *second) {
1414                         mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1415                                         ln, (int)(p - *bufp), NULL);
1416                         second = " ";
1417                         p--;
1418                 }
1419
1420                 if (fsz > 1) {
1421                         roff_setstrn(&r->xmbtab, first,
1422                                         fsz, second, ssz, 0);
1423                         continue;
1424                 }
1425
1426                 if (NULL == r->xtab)
1427                         r->xtab = mandoc_calloc
1428                                 (128, sizeof(struct roffstr));
1429
1430                 free(r->xtab[(int)*first].p);
1431                 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1432                 r->xtab[(int)*first].sz = ssz;
1433         }
1434
1435         return(ROFF_IGN);
1436 }
1437
1438 /* ARGSUSED */
1439 static enum rofferr
1440 roff_so(ROFF_ARGS)
1441 {
1442         char *name;
1443
1444         mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1445
1446         /*
1447          * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1448          * opening anything that's not in our cwd or anything beneath
1449          * it.  Thus, explicitly disallow traversing up the file-system
1450          * or using absolute paths.
1451          */
1452
1453         name = *bufp + pos;
1454         if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1455                 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1456                 return(ROFF_ERR);
1457         }
1458
1459         *offs = pos;
1460         return(ROFF_SO);
1461 }
1462
1463 /* ARGSUSED */
1464 static enum rofferr
1465 roff_userdef(ROFF_ARGS)
1466 {
1467         const char       *arg[9];
1468         char             *cp, *n1, *n2;
1469         int               i;
1470
1471         /*
1472          * Collect pointers to macro argument strings
1473          * and null-terminate them.
1474          */
1475         cp = *bufp + pos;
1476         for (i = 0; i < 9; i++)
1477                 arg[i] = '\0' == *cp ? "" :
1478                     mandoc_getarg(r->parse, &cp, ln, &pos);
1479
1480         /*
1481          * Expand macro arguments.
1482          */
1483         *szp = 0;
1484         n1 = cp = mandoc_strdup(r->current_string);
1485         while (NULL != (cp = strstr(cp, "\\$"))) {
1486                 i = cp[2] - '1';
1487                 if (0 > i || 8 < i) {
1488                         /* Not an argument invocation. */
1489                         cp += 2;
1490                         continue;
1491                 }
1492
1493                 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1494                 n2 = mandoc_malloc(*szp);
1495
1496                 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1497                 strlcat(n2, arg[i], *szp);
1498                 strlcat(n2, cp + 3, *szp);
1499
1500                 cp = n2 + (cp - n1);
1501                 free(n1);
1502                 n1 = n2;
1503         }
1504
1505         /*
1506          * Replace the macro invocation
1507          * by the expanded macro.
1508          */
1509         free(*bufp);
1510         *bufp = n1;
1511         if (0 == *szp)
1512                 *szp = strlen(*bufp) + 1;
1513
1514         return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1515            ROFF_REPARSE : ROFF_APPEND);
1516 }
1517
1518 static char *
1519 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1520 {
1521         char     *name, *cp;
1522
1523         name = *cpp;
1524         if ('\0' == *name)
1525                 return(name);
1526
1527         /* Read until end of name. */
1528         for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1529                 if ('\\' != *cp)
1530                         continue;
1531                 cp++;
1532                 if ('\\' == *cp)
1533                         continue;
1534                 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1535                 *cp = '\0';
1536                 name = cp;
1537         }
1538
1539         /* Nil-terminate name. */
1540         if ('\0' != *cp)
1541                 *(cp++) = '\0';
1542
1543         /* Read past spaces. */
1544         while (' ' == *cp)
1545                 cp++;
1546
1547         *cpp = cp;
1548         return(name);
1549 }
1550
1551 /*
1552  * Store *string into the user-defined string called *name.
1553  * In multiline mode, append to an existing entry and append '\n';
1554  * else replace the existing entry, if there is one.
1555  * To clear an existing entry, call with (*r, *name, NULL, 0).
1556  */
1557 static void
1558 roff_setstr(struct roff *r, const char *name, const char *string,
1559         int multiline)
1560 {
1561
1562         roff_setstrn(&r->strtab, name, strlen(name), string,
1563                         string ? strlen(string) : 0, multiline);
1564 }
1565
1566 static void
1567 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1568                 const char *string, size_t stringsz, int multiline)
1569 {
1570         struct roffkv   *n;
1571         char            *c;
1572         int              i;
1573         size_t           oldch, newch;
1574
1575         /* Search for an existing string with the same name. */
1576         n = *r;
1577
1578         while (n && strcmp(name, n->key.p))
1579                 n = n->next;
1580
1581         if (NULL == n) {
1582                 /* Create a new string table entry. */
1583                 n = mandoc_malloc(sizeof(struct roffkv));
1584                 n->key.p = mandoc_strndup(name, namesz);
1585                 n->key.sz = namesz;
1586                 n->val.p = NULL;
1587                 n->val.sz = 0;
1588                 n->next = *r;
1589                 *r = n;
1590         } else if (0 == multiline) {
1591                 /* In multiline mode, append; else replace. */
1592                 free(n->val.p);
1593                 n->val.p = NULL;
1594                 n->val.sz = 0;
1595         }
1596
1597         if (NULL == string)
1598                 return;
1599
1600         /*
1601          * One additional byte for the '\n' in multiline mode,
1602          * and one for the terminating '\0'.
1603          */
1604         newch = stringsz + (multiline ? 2u : 1u);
1605
1606         if (NULL == n->val.p) {
1607                 n->val.p = mandoc_malloc(newch);
1608                 *n->val.p = '\0';
1609                 oldch = 0;
1610         } else {
1611                 oldch = n->val.sz;
1612                 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1613         }
1614
1615         /* Skip existing content in the destination buffer. */
1616         c = n->val.p + (int)oldch;
1617
1618         /* Append new content to the destination buffer. */
1619         i = 0;
1620         while (i < (int)stringsz) {
1621                 /*
1622                  * Rudimentary roff copy mode:
1623                  * Handle escaped backslashes.
1624                  */
1625                 if ('\\' == string[i] && '\\' == string[i + 1])
1626                         i++;
1627                 *c++ = string[i++];
1628         }
1629
1630         /* Append terminating bytes. */
1631         if (multiline)
1632                 *c++ = '\n';
1633
1634         *c = '\0';
1635         n->val.sz = (int)(c - n->val.p);
1636 }
1637
1638 static const char *
1639 roff_getstrn(const struct roff *r, const char *name, size_t len)
1640 {
1641         const struct roffkv *n;
1642
1643         for (n = r->strtab; n; n = n->next)
1644                 if (0 == strncmp(name, n->key.p, len) &&
1645                                 '\0' == n->key.p[(int)len])
1646                         return(n->val.p);
1647
1648         return(NULL);
1649 }
1650
1651 static void
1652 roff_freestr(struct roffkv *r)
1653 {
1654         struct roffkv    *n, *nn;
1655
1656         for (n = r; n; n = nn) {
1657                 free(n->key.p);
1658                 free(n->val.p);
1659                 nn = n->next;
1660                 free(n);
1661         }
1662 }
1663
1664 const struct tbl_span *
1665 roff_span(const struct roff *r)
1666 {
1667
1668         return(r->tbl ? tbl_span(r->tbl) : NULL);
1669 }
1670
1671 const struct eqn *
1672 roff_eqn(const struct roff *r)
1673 {
1674
1675         return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1676 }
1677
1678 /*
1679  * Duplicate an input string, making the appropriate character
1680  * conversations (as stipulated by `tr') along the way.
1681  * Returns a heap-allocated string with all the replacements made.
1682  */
1683 char *
1684 roff_strdup(const struct roff *r, const char *p)
1685 {
1686         const struct roffkv *cp;
1687         char            *res;
1688         const char      *pp;
1689         size_t           ssz, sz;
1690         enum mandoc_esc  esc;
1691
1692         if (NULL == r->xmbtab && NULL == r->xtab)
1693                 return(mandoc_strdup(p));
1694         else if ('\0' == *p)
1695                 return(mandoc_strdup(""));
1696
1697         /*
1698          * Step through each character looking for term matches
1699          * (remember that a `tr' can be invoked with an escape, which is
1700          * a glyph but the escape is multi-character).
1701          * We only do this if the character hash has been initialised
1702          * and the string is >0 length.
1703          */
1704
1705         res = NULL;
1706         ssz = 0;
1707
1708         while ('\0' != *p) {
1709                 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1710                         sz = r->xtab[(int)*p].sz;
1711                         res = mandoc_realloc(res, ssz + sz + 1);
1712                         memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1713                         ssz += sz;
1714                         p++;
1715                         continue;
1716                 } else if ('\\' != *p) {
1717                         res = mandoc_realloc(res, ssz + 2);
1718                         res[ssz++] = *p++;
1719                         continue;
1720                 }
1721
1722                 /* Search for term matches. */
1723                 for (cp = r->xmbtab; cp; cp = cp->next)
1724                         if (0 == strncmp(p, cp->key.p, cp->key.sz))
1725                                 break;
1726
1727                 if (NULL != cp) {
1728                         /*
1729                          * A match has been found.
1730                          * Append the match to the array and move
1731                          * forward by its keysize.
1732                          */
1733                         res = mandoc_realloc
1734                                 (res, ssz + cp->val.sz + 1);
1735                         memcpy(res + ssz, cp->val.p, cp->val.sz);
1736                         ssz += cp->val.sz;
1737                         p += (int)cp->key.sz;
1738                         continue;
1739                 }
1740
1741                 /*
1742                  * Handle escapes carefully: we need to copy
1743                  * over just the escape itself, or else we might
1744                  * do replacements within the escape itself.
1745                  * Make sure to pass along the bogus string.
1746                  */
1747                 pp = p++;
1748                 esc = mandoc_escape(&p, NULL, NULL);
1749                 if (ESCAPE_ERROR == esc) {
1750                         sz = strlen(pp);
1751                         res = mandoc_realloc(res, ssz + sz + 1);
1752                         memcpy(res + ssz, pp, sz);
1753                         break;
1754                 }
1755                 /*
1756                  * We bail out on bad escapes.
1757                  * No need to warn: we already did so when
1758                  * roff_res() was called.
1759                  */
1760                 sz = (int)(p - pp);
1761                 res = mandoc_realloc(res, ssz + sz + 1);
1762                 memcpy(res + ssz, pp, sz);
1763                 ssz += sz;
1764         }
1765
1766         res[(int)ssz] = '\0';
1767         return(res);
1768 }