]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/window/compress.c
This commit was generated by cvs2svn to compensate for changes in r68651,
[FreeBSD/FreeBSD.git] / usr.bin / window / compress.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36
37 #ifndef lint
38 static char sccsid[] = "@(#)compress.c  8.1 (Berkeley) 6/6/93";
39 static char rcsid[] = "@(#)$FreeBSD$";
40 #endif /* not lint */
41
42 #include "ww.h"
43 #include "tt.h"
44
45         /* special */
46 #include <stdio.h>
47 #include <fcntl.h>
48 #include <stdlib.h>
49 int cc_trace = 0;
50 FILE *cc_trace_fp;
51
52         /* tunable parameters */
53
54 int cc_reverse = 1;
55 int cc_sort = 0;
56 int cc_chop = 0;
57
58 int cc_token_max = 8;           /* <= TOKEN_MAX */
59 int cc_token_min = 2;           /* > tt.tt_put_token_cost */
60 int cc_npass0 = 1;
61 int cc_npass1 = 1;
62
63 int cc_bufsize = 1024 * 3;      /* XXX, or 80 * 24 * 2 */
64
65 int cc_ntoken = 8192;
66
67 #define cc_weight XXX
68 #ifndef cc_weight
69 int cc_weight = 0;
70 #endif
71
72 #define TOKEN_MAX 16
73
74 struct cc {
75         char string[TOKEN_MAX];
76         char length;
77         char flag;
78 #ifndef cc_weight
79         short weight;
80 #endif
81         long time;              /* time last seen */
82         short bcount;           /* count in this buffer */
83         short ccount;           /* count in compression */
84         short places;           /* places in the buffer */
85         short code;             /* token code */
86         struct cc *qforw, *qback;
87         struct cc *hforw, **hback;
88 };
89
90 short cc_thresholds[TOKEN_MAX + 1];
91 #define thresh(length) (cc_thresholds[length])
92 #define threshp(code, count, length) \
93         ((code) >= 0 || (short) (count) >= cc_thresholds[length])
94
95 #ifndef cc_weight
96 short cc_wthresholds[TOKEN_MAX + 1];
97 #define wthresh(length) (cc_wthresholds[length])
98 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
99 #else
100 #define wthreshp(weight, length) (0)
101 #endif
102
103 #ifndef cc_weight
104 short cc_wlimits[TOKEN_MAX + 1];
105 #define wlimit(length) (cc_wlimits[length])
106 #endif
107
108 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
109
110 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
111 #define score_adjust(score, p) \
112         do { \
113                 int length = (p)->length; \
114                 int ccount = (p)->ccount; \
115                 if (threshp((p)->code, ccount, length) || \
116                     wthreshp((p)->weight, length)) /* XXX */ \
117                         (score) -= length - tt.tt_put_token_cost; \
118                 else \
119                         (score) += cc_score_adjustments[length][ccount]; \
120         } while (0)
121
122 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
123
124 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
125
126 #define qinsert(p1, p2) \
127         do { \
128                 register struct cc *forw = (p1)->qforw; \
129                 register struct cc *back = (p1)->qback; \
130                 back->qforw = forw; \
131                 forw->qback = back; \
132                 forw = (p2)->qforw; \
133                 (p1)->qforw = forw; \
134                 forw->qback = (p1); \
135                 (p2)->qforw = (p1); \
136                 (p1)->qback = (p2); \
137         } while (0)
138
139 #define qinsertq(q, p) \
140         ((q)->qforw == (q) ? 0 : \
141          ((q)->qback->qforw = (p)->qforw, \
142           (p)->qforw->qback = (q)->qback, \
143           (q)->qforw->qback = (p), \
144           (p)->qforw = (q)->qforw, \
145           (q)->qforw = (q), \
146           (q)->qback = (q)))
147
148 #define H               (14)
149 #define HSIZE           (1 << H)
150 #define hash(h, c)      ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
151
152 char *cc_buffer;
153 struct cc **cc_output;                  /* the output array */
154 short *cc_places[TOKEN_MAX + 1];
155 short *cc_hashcodes;                    /* for computing hashcodes */
156 struct cc **cc_htab;                    /* the hash table */
157 struct cc **cc_tokens;                  /* holds all the active tokens */
158 struct cc_undo {
159         struct cc **pos;
160         struct cc *val;
161 } *cc_undo;
162
163 long cc_time, cc_time0;
164
165 char *cc_tt_ob, *cc_tt_obe;
166
167 ccinit()
168 {
169         register i, j;
170         register struct cc *p;
171
172         if (tt.tt_token_max > cc_token_max)
173                 tt.tt_token_max = cc_token_max;
174         if (tt.tt_token_min < cc_token_min)
175                 tt.tt_token_min = cc_token_min;
176         if (tt.tt_token_min > tt.tt_token_max) {
177                 tt.tt_ntoken = 0;
178                 return 0;
179         }
180         if (tt.tt_ntoken > cc_ntoken / 2)       /* not likely */
181                 tt.tt_ntoken = cc_ntoken / 2;
182 #define C(x) (sizeof (x) / sizeof *(x))
183         for (i = 0; i < C(cc_thresholds); i++) {
184                 int h = i - tt.tt_put_token_cost;
185                 if (h > 0)
186                         cc_thresholds[i] =
187                                 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
188                 else
189                         cc_thresholds[i] = 0;
190         }
191         for (i = 0; i < C(cc_score_adjustments); i++) {
192                 int t = cc_thresholds[i];
193                 for (j = 0; j < C(*cc_score_adjustments); j++) {
194                         if (j >= t)
195                                 cc_score_adjustments[i][j] =
196                                         - (i - tt.tt_put_token_cost);
197                         else if (j < t - 1)
198                                 cc_score_adjustments[i][j] = 0;
199                         else
200                                 /*
201                                  * cost now is
202                                  *      length * (ccount + 1)           a
203                                  * cost before was
204                                  *      set-token-cost + length +
205                                  *              ccount * put-token-cost b
206                                  * the score adjustment is (b - a)
207                                  */
208                                 cc_score_adjustments[i][j] =
209                                         tt.tt_set_token_cost + i +
210                                                 j * tt.tt_put_token_cost -
211                                                         i * (j + 1);
212                         if (j >= t)
213                                 cc_initial_scores[i][j] = 0;
214                         else
215                                 /*
216                                  * - (set-token-cost +
217                                  *      (length - put-token-cost) -
218                                  *      (length - put-token-cost) * ccount)
219                                  */
220                                 cc_initial_scores[i][j] =
221                                         - (tt.tt_set_token_cost +
222                                            (i - tt.tt_put_token_cost) -
223                                            (i - tt.tt_put_token_cost) * j);
224                 }
225         }
226 #ifndef cc_weight
227         for (i = 1; i < C(cc_wthresholds); i++) {
228                 cc_wthresholds[i] =
229                         ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
230                                 i / 5 + 1) *
231                                 cc_weight + 1;
232                 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
233         }
234 #endif
235 #undef C
236         if ((cc_output = (struct cc **)
237              malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
238                 goto nomem;
239         if ((cc_hashcodes = (short *)
240              malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
241                 goto nomem;
242         if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
243                 goto nomem;
244         if ((cc_tokens = (struct cc **)
245              malloc((unsigned)
246                     (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
247                     sizeof *cc_tokens)) == 0)
248                 goto nomem;
249         if ((cc_undo = (struct cc_undo *)
250              malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
251                 goto nomem;
252         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
253                 if ((cc_places[i] = (short *)
254                      malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
255                         goto nomem;
256         cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
257         cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
258         cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
259         cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
260         if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
261                 goto nomem;
262         for (i = 0; i < tt.tt_ntoken; i++) {
263                 p->code = i;
264                 p->time = -1;
265                 p->qback = cc_q0a.qback;
266                 p->qforw = &cc_q0a;
267                 p->qback->qforw = p;
268                 cc_q0a.qback = p;
269                 p++;
270         }
271         for (; i < cc_ntoken; i++) {
272                 p->code = -1;
273                 p->time = -1;
274                 p->qback = cc_q1a.qback;
275                 p->qforw = &cc_q1a;
276                 p->qback->qforw = p;
277                 cc_q1a.qback = p;
278                 p++;
279         }
280         cc_tt_ob = tt_ob;
281         cc_tt_obe = tt_obe;
282         if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
283                 goto nomem;
284         return 0;
285 nomem:
286         wwerrno = WWE_NOMEM;
287         return -1;
288 }
289
290 ccstart()
291 {
292         int ccflush();
293
294         ttflush();
295         tt_obp = tt_ob = cc_buffer;
296         tt_obe = tt_ob + cc_bufsize;
297         tt.tt_flush = ccflush;
298         if (cc_trace) {
299                 cc_trace_fp = fopen("window-trace", "a");
300                 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
301         }
302         ccreset();
303 }
304
305 ccreset()
306 {
307         register struct cc *p;
308
309         bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
310         for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
311                 p->hback = 0;
312         for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
313                 p->hback = 0;
314 }
315
316 ccend()
317 {
318
319         ttflush();
320         tt_obp = tt_ob = cc_tt_ob;
321         tt_obe = cc_tt_obe;
322         tt.tt_flush = 0;
323         if (cc_trace_fp != NULL) {
324                 (void) fclose(cc_trace_fp);
325                 cc_trace_fp = NULL;
326         }
327 }
328
329 ccflush()
330 {
331         int bufsize = tt_obp - tt_ob;
332         int n;
333
334         if (tt_ob != cc_buffer)
335                 abort();
336         if (cc_trace_fp != NULL) {
337                 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
338                 (void) putc(-1, cc_trace_fp);
339         }
340         tt.tt_flush = 0;
341         (*tt.tt_compress)(1);
342         if (bufsize < tt.tt_token_min) {
343                 ttflush();
344                 goto out;
345         }
346         tt_obp = tt_ob = cc_tt_ob;
347         tt_obe = cc_tt_obe;
348         cc_time0 = cc_time;
349         cc_time += bufsize;
350         n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
351         cc_compress_phase(cc_output, bufsize, cc_tokens, n);
352         cc_output_phase(cc_buffer, cc_output, bufsize);
353         ttflush();
354         tt_obp = tt_ob = cc_buffer;
355         tt_obe = cc_buffer + cc_bufsize;
356 out:
357         (*tt.tt_compress)(0);
358         tt.tt_flush = ccflush;
359 }
360
361 cc_sweep_phase(buffer, bufsize, tokens)
362         char *buffer;
363         struct cc **tokens;
364 {
365         register struct cc **pp = tokens;
366         register i, n;
367 #ifdef STATS
368         int nn, ii;
369 #endif
370
371 #ifdef STATS
372         if (verbose >= 0)
373                 time_begin();
374         if (verbose > 0)
375                 printf("Sweep:");
376 #endif
377         cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
378 #ifdef STATS
379         ntoken_stat = 0;
380         nn = 0;
381         ii = 0;
382 #endif
383         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
384 #ifdef STATS
385                 if (verbose > 0) {
386                         if (ii > 7) {
387                                 printf("\n      ");
388                                 ii = 0;
389                         }
390                         ii++;
391                         printf(" (%d", i);
392                         (void) fflush(stdout);
393                 }
394 #endif
395                 n = cc_sweep(buffer, bufsize, pp, i);
396                 pp += n;
397 #ifdef STATS
398                 if (verbose > 0) {
399                         if (--n > 0) {
400                                 printf(" %d", n);
401                                 nn += n;
402                         }
403                         putchar(')');
404                 }
405 #endif
406         }
407         qinsertq(&cc_q1b, &cc_q1a);
408 #ifdef STATS
409         if (verbose > 0)
410                 printf("\n       %d tokens, %d candidates\n",
411                         ntoken_stat, nn);
412         if (verbose >= 0)
413                 time_end();
414 #endif
415         return pp - tokens;
416 }
417
418 cc_sweep0(buffer, n, length)
419         char *buffer;
420 {
421         register char *p;
422         register short *hc;
423         register i;
424         register short c;
425         register short pc = tt.tt_padc;
426
427         /* n and length are at least 1 */
428         p = buffer++;
429         hc = cc_hashcodes;
430         i = n;
431         do {
432                 if ((*hc++ = *p++) == pc)
433                         hc[-1] = -1;
434         } while (--i);
435         while (--length) {
436                 p = buffer++;
437                 hc = cc_hashcodes;
438                 for (i = n--; --i;) {
439                         if ((c = *p++) == pc || *hc < 0)
440                                 c = -1;
441                         else
442                                 c = hash(*hc, c);
443                         *hc++ = c;
444                 }
445         }
446 }
447
448 cc_sweep(buffer, bufsize, tokens, length)
449         char *buffer;
450         struct cc **tokens;
451         register length;
452 {
453         register struct cc *p;
454         register char *cp;
455         register i;
456         short *hc;
457         short *places = cc_places[length];
458         struct cc **pp = tokens;
459         short threshold = thresh(length);
460 #ifndef cc_weight
461         short wthreshold = wthresh(length);
462         short limit = wlimit(length);
463 #endif
464         int time;
465         short pc = tt.tt_padc;
466
467         i = length - 1;
468         bufsize -= i;
469         cp = buffer + i;
470         hc = cc_hashcodes;
471         time = cc_time0;
472         for (i = 0; i < bufsize; i++, time++) {
473                 struct cc **h;
474
475                 {
476                         register short *hc1 = hc;
477                         register short c = *cp++;
478                         register short hh;
479                         if ((hh = *hc1) < 0 || c == pc) {
480                                 *hc1++ = -1;
481                                 hc = hc1;
482                                 continue;
483                         }
484                         h = cc_htab + (*hc1++ = hash(hh, c));
485                         hc = hc1;
486                 }
487                 for (p = *h; p != 0; p = p->hforw)
488                         if (p->length == (char) length) {
489                                 register char *p1 = p->string;
490                                 register char *p2 = cp - length;
491                                 register n = length;
492                                 do
493                                         if (*p1++ != *p2++)
494                                                 goto fail;
495                                 while (--n);
496                                 break;
497                         fail:;
498                         }
499                 if (p == 0) {
500                         p = cc_q1a.qback;
501                         if (p == &cc_q1a ||
502                             p->time >= cc_time0 && p->length == (char) length)
503                                 continue;
504                         if (p->hback != 0)
505                                 if ((*p->hback = p->hforw) != 0)
506                                         p->hforw->hback = p->hback;
507                         {
508                                 register char *p1 = p->string;
509                                 register char *p2 = cp - length;
510                                 register n = length;
511                                 do
512                                         *p1++ = *p2++;
513                                 while (--n);
514                         }
515                         p->length = length;
516 #ifndef cc_weight
517                         p->weight = cc_weight;
518 #endif
519                         p->time = time;
520                         p->bcount = 1;
521                         p->ccount = 0;
522                         p->flag = 0;
523                         if ((p->hforw = *h) != 0)
524                                 p->hforw->hback = &p->hforw;
525                         *h = p;
526                         p->hback = h;
527                         qinsert(p, &cc_q1a);
528                         places[i] = -1;
529                         p->places = i;
530 #ifdef STATS
531                         ntoken_stat++;
532 #endif
533                 } else if (p->time < cc_time0) {
534 #ifndef cc_weight
535                         if ((p->weight += p->time - time) < 0)
536                                 p->weight = cc_weight;
537                         else if ((p->weight += cc_weight) > limit)
538                                 p->weight = limit;
539 #endif
540                         p->time = time;
541                         p->bcount = 1;
542                         p->ccount = 0;
543                         if (p->code >= 0) {
544                                 p->flag = 1;
545                                 *pp++ = p;
546                         } else
547 #ifndef cc_weight
548                         if (p->weight >= wthreshold) {
549                                 p->flag = 1;
550                                 *pp++ = p;
551                                 qinsert(p, &cc_q1b);
552                         } else
553 #endif
554                         {
555                                 p->flag = 0;
556                                 qinsert(p, &cc_q1a);
557                         }
558                         places[i] = -1;
559                         p->places = i;
560 #ifdef STATS
561                         ntoken_stat++;
562 #endif
563                 } else if (p->time + length > time) {
564                         /*
565                          * overlapping token, don't count as two and
566                          * don't update time, but do adjust weight to offset
567                          * the difference
568                          */
569 #ifndef cc_weight
570                         if (cc_weight != 0) {   /* XXX */
571                                 p->weight += time - p->time;
572                                 if (!p->flag && p->weight >= wthreshold) {
573                                         p->flag = 1;
574                                         *pp++ = p;
575                                         qinsert(p, &cc_q1b);
576                                 }
577                         }
578 #endif
579                         places[i] = p->places;
580                         p->places = i;
581                 } else {
582 #ifndef cc_weight
583                         if ((p->weight += p->time - time) < 0)
584                                 p->weight = cc_weight;
585                         else if ((p->weight += cc_weight) > limit)
586                                 p->weight = limit;
587 #endif
588                         p->time = time;
589                         p->bcount++;
590                         if (!p->flag &&
591                             /* code must be < 0 if flag false here */
592                             (p->bcount >= threshold
593 #ifndef cc_weight
594                              || p->weight >= wthreshold
595 #endif
596                              )) {
597                                 p->flag = 1;
598                                 *pp++ = p;
599                                 qinsert(p, &cc_q1b);
600                         }
601                         places[i] = p->places;
602                         p->places = i;
603                 }
604         }
605         if ((i = pp - tokens) > 0) {
606                 *pp = 0;
607                 if (cc_reverse)
608                         cc_sweep_reverse(tokens, places);
609                 if (cc_sort && i > 1) {
610                         int cc_token_compare();
611                         qsort((char *) tokens, i, sizeof *tokens,
612                               cc_token_compare);
613                 }
614                 if (cc_chop) {
615                         if ((i = i * cc_chop / 100) == 0)
616                                 i = 1;
617                         tokens[i] = 0;
618                 }
619                 i++;
620         }
621         return i;
622 }
623
624 cc_sweep_reverse(pp, places)
625         register struct cc **pp;
626         register short *places;
627 {
628         register struct cc *p;
629         register short front, back, t;
630
631         while ((p = *pp++) != 0) {
632                 back = -1;
633                 t = p->places;
634                 /* the list is never empty */
635                 do {
636                         front = places[t];
637                         places[t] = back;
638                         back = t;
639                 } while ((t = front) >= 0);
640                 p->places = back;
641         }
642 }
643
644 cc_compress_phase(output, bufsize, tokens, ntoken)
645         struct cc **output;
646         struct cc **tokens;
647 {
648         register i;
649
650         bzero((char *) output, bufsize * sizeof *output);
651         for (i = 0; i < cc_npass0; i++)
652                 cc_compress_phase1(output, tokens, ntoken, 0);
653         for (i = 0; i < cc_npass1; i++)
654                 cc_compress_phase1(output, tokens, ntoken, 1);
655         cc_compress_cleanup(output, bufsize);
656 }
657
658 cc_compress_phase1(output, tokens, ntoken, flag)
659         register struct cc **output;
660         struct cc **tokens;
661 {
662         register struct cc **pp;
663 #ifdef STATS
664         register int i = 0;
665         int nt = 0, cc = 0, nc = 0;
666 #endif
667
668 #ifdef STATS
669         if (verbose >= 0)
670                 time_begin();
671         if (verbose > 0)
672                 printf("Compress:");
673 #endif
674         pp = tokens;
675         while (pp < tokens + ntoken) {
676 #ifdef STATS
677                 if (verbose > 0) {
678                         ntoken_stat = 0;
679                         ccount_stat = 0;
680                         ncover_stat = 0;
681                         if (i > 2) {
682                                 printf("\n         ");
683                                 i = 0;
684                         }
685                         i++;
686                         printf(" (%d", (*pp)->length);
687                         (void) fflush(stdout);
688                 }
689 #endif
690                 pp += cc_compress(output, pp, flag);
691 #ifdef STATS
692                 if (verbose > 0) {
693                         printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
694                                ncover_stat);
695                         nt += ntoken_stat;
696                         cc += ccount_stat;
697                         nc += ncover_stat;
698                 }
699 #endif
700         }
701 #ifdef STATS
702         if (verbose > 0)
703                 printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
704         if (verbose >= 0)
705                 time_end();
706 #endif
707 }
708
709 cc_compress_cleanup(output, bufsize)
710         register struct cc **output;
711 {
712         register struct cc **end;
713
714         /* the previous output phase may have been interrupted */
715         qinsertq(&cc_q0b, &cc_q0a);
716         for (end = output + bufsize; output < end;) {
717                 register struct cc *p;
718                 register length;
719                 if ((p = *output) == 0) {
720                         output++;
721                         continue;
722                 }
723                 length = p->length;
724                 if (!p->flag) {
725                 } else if (p->code >= 0) {
726                         qinsert(p, &cc_q0b);
727                         p->flag = 0;
728                 } else if (p->ccount == 0) {
729                         *output = 0;
730                 } else if (p->ccount >= thresh(length)
731 #ifndef cc_weight
732                            || wthreshp(p->weight, length)
733 #endif
734                            ) {
735                         p->flag = 0;
736                 } else {
737                         p->ccount = 0;
738                         *output = 0;
739                 }
740                 output += length;
741         }
742 }
743
744 cc_compress(output, tokens, flag)
745         struct cc **output;
746         struct cc **tokens;
747         char flag;
748 {
749         struct cc **pp = tokens;
750         register struct cc *p = *pp++;
751         int length = p->length;
752         int threshold = thresh(length);
753 #ifndef cc_weight
754         short wthreshold = wthresh(length);
755 #endif
756         short *places = cc_places[length];
757         int *initial_scores = cc_initial_scores[length];
758         int initial_score0 = put_token_score(length);
759
760         do {
761                 int score;
762                 register struct cc_undo *undop;
763                 int ccount;
764 #ifdef STATS
765                 int ncover;
766 #endif
767                 int i;
768
769                 ccount = p->ccount;
770                 if ((short) ccount >= p->bcount)
771                         continue;
772                 if (p->code >= 0 || ccount >= threshold)
773                         score = 0;
774 #ifndef cc_weight
775                 else if (p->weight >= wthreshold)
776                         /* allow one fewer match than normal */
777                         /* XXX, should adjust for ccount */
778                         score = - tt.tt_set_token_cost;
779 #endif
780                 else
781                         score = initial_scores[ccount];
782                 undop = cc_undo;
783 #ifdef STATS
784                 ncover = 0;
785 #endif
786                 for (i = p->places; i >= 0; i = places[i]) {
787                         register struct cc **jp;
788                         register struct cc *x;
789                         register struct cc **ip = output + i;
790                         register score0 = initial_score0;
791                         struct cc **iip = ip + length;
792                         struct cc_undo *undop1 = undop;
793
794                         if ((x = *(jp = ip)) != 0)
795                                 goto z;
796                         while (--jp >= output)
797                                 if ((x = *jp) != 0) {
798                                         if (jp + x->length > ip)
799                                                 goto z;
800                                         break;
801                                 }
802                         jp = ip + 1;
803                         while (jp < iip) {
804                                 if ((x = *jp) == 0) {
805                                         jp++;
806                                         continue;
807                                 }
808                         z:
809                                 if (x == p)
810                                         goto undo;
811 #ifdef STATS
812                                 ncover++;
813 #endif
814                                 undop->pos = jp;
815                                 undop->val = x;
816                                 undop++;
817                                 *jp = 0;
818                                 x->ccount--;
819                                 score_adjust(score0, x);
820                                 if (score0 < 0 && flag)
821                                         goto undo;
822                                 jp += x->length;
823                         }
824                         undop->pos = ip;
825                         undop->val = 0;
826                         undop++;
827                         *ip = p;
828                         ccount++;
829                         score += score0;
830                         continue;
831                 undo:
832                         while (--undop >= undop1)
833                                 if (*undop->pos = x = undop->val)
834                                         x->ccount++;
835                         undop++;
836                 }
837                 if (score > 0) {
838 #ifdef STATS
839                         ccount_stat += ccount - p->ccount;
840                         ntoken_stat++;
841                         ncover_stat += ncover;
842 #endif
843                         p->ccount = ccount;
844                 } else {
845                         register struct cc_undo *u = cc_undo;
846                         while (--undop >= u) {
847                                 register struct cc *x;
848                                 if (*undop->pos = x = undop->val)
849                                         x->ccount++;
850                         }
851                 }
852         } while ((p = *pp++) != 0);
853         return pp - tokens;
854 }
855
856 cc_output_phase(buffer, output, bufsize)
857         register char *buffer;
858         register struct cc **output;
859         register bufsize;
860 {
861         register i;
862         register struct cc *p, *p1;
863
864         for (i = 0; i < bufsize;) {
865                 if ((p = output[i]) == 0) {
866                         ttputc(buffer[i]);
867                         i++;
868                 } else if (p->code >= 0) {
869                         if (--p->ccount == 0)
870                                 qinsert(p, &cc_q0a);
871                         (*tt.tt_put_token)(p->code, p->string, p->length);
872                         wwntokuse++;
873                         wwntoksave += put_token_score(p->length);
874                         i += p->length;
875                 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
876                         p->code = p1->code;
877                         p1->code = -1;
878                         qinsert(p1, &cc_q1a);
879                         if (--p->ccount == 0)
880                                 qinsert(p, &cc_q0a);
881                         else
882                                 qinsert(p, &cc_q0b);
883                         (*tt.tt_set_token)(p->code, p->string, p->length);
884                         wwntokdef++;
885                         wwntoksave -= tt.tt_set_token_cost;
886                         i += p->length;
887                 } else {
888                         p->ccount--;
889                         ttwrite(p->string, p->length);
890                         wwntokbad++;
891                         i += p->length;
892                 }
893         }
894         wwntokc += bufsize;
895 }
896
897 cc_token_compare(p1, p2)
898         struct cc **p1, **p2;
899 {
900         return (*p2)->bcount - (*p1)->bcount;
901 }