From bef006b8fce691d9a1efce1ebaab499612c07e0f Mon Sep 17 00:00:00 2001 From: ache Date: Sun, 11 Aug 1996 11:42:03 +0000 Subject: [PATCH] Use locale for character classes instead of hardcoded values Misc 8bit cleanup --- lib/libc/regex/cclass.h | 44 ++++++++++-------------- lib/libc/regex/engine.c | 2 +- lib/libc/regex/regcomp.c | 72 ++++++++++++++++++++++++++++++++++++---- lib/libc/regex/regex2.h | 8 ++--- 4 files changed, 89 insertions(+), 37 deletions(-) diff --git a/lib/libc/regex/cclass.h b/lib/libc/regex/cclass.h index cabe7faf223..581909c869c 100644 --- a/lib/libc/regex/cclass.h +++ b/lib/libc/regex/cclass.h @@ -37,34 +37,26 @@ * @(#)cclass.h 8.3 (Berkeley) 3/20/94 */ + +typedef enum {CALNUM, CALPHA, CBLANK, CCNTRL, CDIGIT, CGRAPH, + CLOWER, CPRINT, CPUNCT, CSPACE, CUPPER, CXDIGIT} citype; + /* character-class table */ static struct cclass { char *name; - char *chars; - char *multis; + citype fidx; } cclasses[] = { - {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", ""}, - {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - ""}, - {"blank", " \t", ""}, - {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", ""}, - {"digit", "0123456789", ""}, - {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""}, - {"lower", "abcdefghijklmnopqrstuvwxyz", - ""}, - {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - ""}, - {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""}, - {"space", "\t\n\v\f\r ", ""}, - {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - ""}, - {"xdigit", "0123456789ABCDEFabcdef", - ""}, - {NULL, 0, ""} + {"alnum", CALNUM}, + {"alpha", CALPHA}, + {"blank", CBLANK}, + {"cntrl", CCNTRL}, + {"digit", CDIGIT}, + {"graph", CGRAPH}, + {"lower", CLOWER}, + {"print", CPRINT}, + {"punct", CPUNCT}, + {"space", CSPACE}, + {"upper", CUPPER}, + {"xdigit", CXDIGIT}, + {NULL, } }; diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c index e7917b9f815..be569b19afe 100644 --- a/lib/libc/regex/engine.c +++ b/lib/libc/regex/engine.c @@ -1072,7 +1072,7 @@ int ch; { static char pbuf[10]; - if (isprint(ch) || ch == ' ') + if (isprint((uch)ch) || ch == ' ') sprintf(pbuf, "%c", ch); else sprintf(pbuf, "\\%o", ch); diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index f7962eae714..1828a3bf7a5 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -816,13 +816,12 @@ p_b_cclass(p, cs) register struct parse *p; register cset *cs; { + register int c; register char *sp = p->next; register struct cclass *cp; register size_t len; - register char *u; - register char c; - while (MORE() && isalpha((unsigned char)PEEK())) + while (MORE() && isalpha((uch)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) @@ -834,11 +833,72 @@ register cset *cs; return; } - u = cp->chars; - while ((c = *u++) != '\0') - CHadd(cs, c); + switch (cp->fidx) { + case CALNUM: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isalnum((uch)c)) + CHadd(cs, c); + break; + case CALPHA: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isalpha((uch)c)) + CHadd(cs, c); + break; + case CBLANK: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isblank((uch)c)) + CHadd(cs, c); + break; + case CCNTRL: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (iscntrl((uch)c)) + CHadd(cs, c); + break; + case CDIGIT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isdigit((uch)c)) + CHadd(cs, c); + break; + case CGRAPH: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isgraph((uch)c)) + CHadd(cs, c); + break; + case CLOWER: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (islower((uch)c)) + CHadd(cs, c); + break; + case CPRINT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isprint((uch)c)) + CHadd(cs, c); + break; + case CPUNCT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (ispunct((uch)c)) + CHadd(cs, c); + break; + case CSPACE: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isspace((uch)c)) + CHadd(cs, c); + break; + case CUPPER: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isupper((uch)c)) + CHadd(cs, c); + break; + case CXDIGIT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isxdigit((uch)c)) + CHadd(cs, c); + break; + } +#if 0 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u); +#endif } /* diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h index 64b62121f82..9560e056bd7 100644 --- a/lib/libc/regex/regex2.h +++ b/lib/libc/regex/regex2.h @@ -121,13 +121,13 @@ typedef long sopno; typedef struct { uch *ptr; /* -> uch [csetsize] */ uch mask; /* bit within array */ - uch hash; /* hash code */ + short hash; /* hash code */ size_t smultis; char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ } cset; /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ -#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) -#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (uch)(c)) +#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (uch)(c)) #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ #define MCsub(p, cs, cp) mcsub(p, cs, cp) @@ -170,4 +170,4 @@ struct re_guts { /* misc utilities */ #define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') +#define ISWORD(c) (isalnum((uch)(c)) || (c) == '_') -- 2.45.2