2 /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
3 Free Software Foundation, Inc.
4 Written by James Clark (jjc@jclark.com)
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with groff; see the file COPYING. If not, write to the Free Software
20 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
29 extern "C" const char *Version_string;
31 const char PRE_LABEL_MARKER = '\013';
32 const char POST_LABEL_MARKER = '\014';
33 const char LABEL_MARKER = '\015'; // label_type is added on
35 #define FORCE_LEFT_BRACKET 04
36 #define FORCE_RIGHT_BRACKET 010
38 static FILE *outfp = stdout;
40 string capitalize_fields;
41 string reverse_fields;
42 string abbreviate_fields;
43 string period_before_last_name = ". ";
44 string period_before_initial = ".";
45 string period_before_hyphen = "";
46 string period_before_other = ". ";
48 int annotation_field = -1;
49 string annotation_macro;
50 string discard_fields = "XYZ";
51 string pre_label = "\\*([.";
52 string post_label = "\\*(.]";
53 string sep_label = ", ";
55 int move_punctuation = 0;
56 int abbreviate_label_ranges = 0;
57 string label_range_indicator;
58 int label_in_text = 1;
59 int label_in_reference = 1;
60 int date_as_label = 0;
61 int sort_adjacent_labels = 0;
62 // Join exactly two authors with this.
63 string join_authors_exactly_two = " and ";
64 // When there are more than two authors join the last two with this.
65 string join_authors_last_two = ", and ";
66 // Otherwise join authors with this.
67 string join_authors_default = ", ";
68 string separate_label_second_parts = ", ";
69 // Use this string to represent that there are other authors.
70 string et_al = " et al";
71 // Use et al only if it can replace at least this many authors.
72 int et_al_min_elide = 2;
73 // Use et al only if the total number of authors is at least this.
74 int et_al_min_total = 3;
77 int compatible_flag = 0;
79 int short_label_flag = 0;
81 static int recognize_R1_R2 = 1;
83 search_list database_list;
84 int search_default = 1;
85 static int default_database_loaded = 0;
87 static reference **citation = 0;
88 static int ncitations = 0;
89 static int citation_max = 0;
91 static reference **reference_hash_table = 0;
92 static int hash_table_size;
93 static int nreferences = 0;
95 static int need_syncing = 0;
97 string pending_lf_lines;
99 static void output_pending_line();
100 static unsigned immediately_handle_reference(const string &);
101 static void immediately_output_references();
102 static unsigned store_reference(const string &);
103 static void divert_to_temporary_file();
104 static reference *make_reference(const string &, unsigned *);
105 static void usage(FILE *stream);
106 static void do_file(const char *);
107 static void split_punct(string &line, string &punct);
108 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
109 static void possibly_load_default_database();
111 int main(int argc, char **argv)
113 program_name = argv[0];
114 static char stderr_buf[BUFSIZ];
115 setbuf(stderr, stderr_buf);
117 int finished_options = 0;
122 !finished_options && argc > 0 && argv[0][0] == '-'
123 && argv[0][1] != '\0';
125 const char *opt = argv[0] + 1;
126 while (opt != 0 && *opt != '\0') {
134 label_in_reference = 0;
138 annotation_field = 'X';
139 annotation_macro = "AP";
141 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
142 annotation_field = opt[0];
143 annotation_macro = opt + 2;
148 move_punctuation = 1;
156 // Not a very useful spec.
157 set_label_spec("(A.n|Q)', '(D.y|D)");
171 if (*++opt == '\0') {
177 error("option `f' requires an argument");
187 for (ptr = num; *ptr; ptr++)
188 if (!csdigit(*ptr)) {
189 error("bad character `%1' in argument to -f option", *ptr);
197 set_label_spec(spec.contents());
204 label_in_reference = 0;
212 capitalize_fields = ++opt;
222 error("bad field name `%1'", *opt++);
236 for (ptr = ++opt; *ptr; ptr++)
237 if (!csdigit(*ptr)) {
238 error("argument to `a' option not a number");
242 reverse_fields = 'A';
243 reverse_fields += opt;
249 linear_ignore_fields = ++opt;
254 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
256 if (*++opt != '\0' && *opt != ',') {
258 long n = strtol(opt, &ptr, 10);
259 if (n == 0 && ptr == opt) {
260 error("bad integer `%1' in `l' option", opt);
267 sprintf(strchr(buf, '\0'), "+%ld", n);
274 long n = strtol(opt, &ptr, 10);
275 if (n == 0 && ptr == opt) {
276 error("bad integer `%1' in `l' option", opt);
282 sprintf(strchr(buf, '\0'), "-%ld", n);
285 error("argument to `l' option not of form `m,n'");
288 if (!set_label_spec(buf))
299 const char *filename = 0;
300 if (*++opt == '\0') {
306 error("option `p' requires an argument");
315 database_list.add_file(filename);
330 long n = strtol(opt, &ptr, 10);
331 if (n == 0 && ptr == opt) {
332 error("bad integer `%1' in `t' option", opt);
338 linear_truncate_len = int(n);
343 if (opt[1] == '\0') {
344 finished_options = 1;
348 if (strcmp(opt,"-version")==0) {
350 printf("GNU refer (groff) version %s\n", Version_string);
354 if (strcmp(opt,"-help")==0) {
361 error("unrecognized option `%1'", *opt);
369 set_label_spec("%1");
377 for (int i = 0; i < argc; i++) {
386 if (fflush(stdout) < 0)
387 fatal("output error");
391 static void usage(FILE *stream)
394 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
395 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
399 static void possibly_load_default_database()
401 if (search_default && !default_database_loaded) {
402 char *filename = getenv("REFER");
404 database_list.add_file(filename);
406 database_list.add_file(DEFAULT_INDEX, 1);
407 default_database_loaded = 1;
411 static int is_list(const string &str)
413 const char *start = str.contents();
414 const char *end = start + str.length();
415 while (end > start && csspace(end[-1]))
417 while (start < end && csspace(*start))
419 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
422 static void do_file(const char *filename)
425 if (strcmp(filename, "-") == 0) {
430 fp = fopen(filename, "r");
432 error("can't open `%1': %2", filename, strerror(errno));
436 current_filename = filename;
437 fprintf(outfp, ".lf 1 %s\n", filename);
445 if (line.length() > 0)
449 if (invalid_input_char(c))
450 error("invalid input character code %1", c);
457 int len = line.length();
461 if (len >= 2 && line[0] == '.' && line[1] == '[') {
462 int start_lineno = current_lineno;
463 int start_of_line = 1;
466 string pre(line.contents() + 2, line.length() - 3);
470 error_with_file_and_line(current_filename, start_lineno,
471 "missing `.]' line");
476 if (start_of_line && c == '.') {
479 while ((d = getc(fp)) != '\n' && d != EOF) {
480 if (invalid_input_char(d))
481 error("invalid input character code %1", d);
490 if (invalid_input_char(c))
491 error("invalid input character code %1", c);
494 start_of_line = (c == '\n');
497 output_pending_line();
501 error("found `$LIST$' but not accumulating references");
504 unsigned flags = (accumulate
505 ? store_reference(str)
506 : immediately_handle_reference(str));
508 if (accumulate && outfp == stdout)
509 divert_to_temporary_file();
510 if (pending_line.length() == 0) {
511 warning("can't attach citation to previous line");
514 pending_line.set_length(pending_line.length() - 1);
516 if (move_punctuation)
517 split_punct(pending_line, punct);
518 int have_text = pre.length() > 0 || post.length() > 0;
519 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
520 |FORCE_RIGHT_BRACKET));
521 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
522 pending_line += PRE_LABEL_MARKER;
524 char lm = LABEL_MARKER + (int)lt;
526 pending_line += post;
527 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
528 pending_line += POST_LABEL_MARKER;
529 pending_line += punct;
530 pending_line += '\n';
536 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
537 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
538 pending_lf_lines += line;
540 if (interpret_lf_args(line.contents() + 3))
543 else if (recognize_R1_R2
545 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
546 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
548 int start_of_line = 1;
549 int start_lineno = current_lineno;
552 if (c != EOF && start_of_line)
554 if (start_of_line && c == '.') {
560 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
561 while (c != EOF && c != '\n')
580 error_with_file_and_line(current_filename, start_lineno,
581 "missing `.R2' line");
584 if (invalid_input_char(c))
585 error("invalid input character code %1", int(c));
588 start_of_line = c == '\n';
591 output_pending_line();
596 process_commands(line, current_filename, start_lineno + 1);
600 output_pending_line();
605 output_pending_line();
610 class label_processing_state {
615 PENDING_LABEL_POST_PRE,
618 label_type type; // type of pending labels
619 int count; // number of pending labels
620 reference **rptr; // pointer to next reference
621 int rcount; // number of references left
623 int handle_pending(int c);
625 label_processing_state(reference **, int, FILE *);
626 ~label_processing_state();
630 static void output_pending_line()
632 if (label_in_text && !accumulate && ncitations > 0) {
633 label_processing_state state(citation, ncitations, outfp);
634 int len = pending_line.length();
635 for (int i = 0; i < len; i++)
636 state.process((unsigned char)(pending_line[i]));
639 put_string(pending_line, outfp);
640 pending_line.clear();
641 if (pending_lf_lines.length() > 0) {
642 put_string(pending_lf_lines, outfp);
643 pending_lf_lines.clear();
646 immediately_output_references();
648 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
653 static void split_punct(string &line, string &punct)
655 const char *start = line.contents();
656 const char *end = start + line.length();
657 const char *ptr = start;
658 const char *last_token_start = 0;
662 last_token_start = ptr;
663 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
664 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
666 else if (!get_token(&ptr, end))
669 if (last_token_start) {
670 const token_info *ti = lookup_token(last_token_start, end);
671 if (ti->is_punct()) {
672 punct.append(last_token_start, end - last_token_start);
673 line.set_length(last_token_start - start);
678 static void divert_to_temporary_file()
683 static void store_citation(reference *ref)
685 if (ncitations >= citation_max) {
687 citation = new reference*[citation_max = 100];
689 reference **old_citation = citation;
691 citation = new reference *[citation_max];
692 memcpy(citation, old_citation, ncitations*sizeof(reference *));
693 a_delete old_citation;
696 citation[ncitations++] = ref;
699 static unsigned store_reference(const string &str)
701 if (reference_hash_table == 0) {
702 reference_hash_table = new reference *[17];
703 hash_table_size = 17;
704 for (int i = 0; i < hash_table_size; i++)
705 reference_hash_table[i] = 0;
708 reference *ref = make_reference(str, &flags);
709 ref->compute_hash_code();
710 unsigned h = ref->hash();
712 for (ptr = reference_hash_table + (h % hash_table_size);
714 ((ptr == reference_hash_table)
715 ? (ptr = reference_hash_table + hash_table_size - 1)
717 if (same_reference(**ptr, *ref))
720 if (ref->is_merged())
721 warning("fields ignored because reference already used");
727 ref->set_number(nreferences);
729 ref->pre_compute_label();
730 ref->compute_sort_key();
731 if (nreferences*2 >= hash_table_size) {
733 reference **old_table = reference_hash_table;
734 int old_size = hash_table_size;
735 hash_table_size = next_size(hash_table_size);
736 reference_hash_table = new reference*[hash_table_size];
738 for (i = 0; i < hash_table_size; i++)
739 reference_hash_table[i] = 0;
740 for (i = 0; i < old_size; i++)
743 for (p = (reference_hash_table
744 + (old_table[i]->hash() % hash_table_size));
746 ((p == reference_hash_table)
747 ? (p = reference_hash_table + hash_table_size - 1)
760 unsigned immediately_handle_reference(const string &str)
763 reference *ref = make_reference(str, &flags);
764 ref->set_number(nreferences);
765 if (label_in_text || label_in_reference) {
766 ref->pre_compute_label();
767 ref->immediate_compute_label();
774 static void immediately_output_references()
776 for (int i = 0; i < ncitations; i++) {
777 reference *ref = citation[i];
778 if (label_in_reference) {
779 fputs(".ds [F ", outfp);
780 const string &label = ref->get_label(NORMAL_LABEL);
781 if (label.length() > 0
782 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
784 put_string(label, outfp);
793 static void output_citation_group(reference **v, int n, label_type type,
796 if (sort_adjacent_labels) {
797 // Do an insertion sort. Usually n will be very small.
798 for (int i = 1; i < n; i++) {
799 int num = v[i]->get_number();
800 reference *temp = v[i];
802 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
807 // This messes up if !accumulate.
808 if (accumulate && n > 1) {
811 for (int i = 1; i < n; i++)
812 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
817 for (int i = 0; i < n; i++) {
818 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
820 put_string(merged_label, fp);
824 put_string(v[i]->get_label(type), fp);
826 put_string(sep_label, fp);
831 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
832 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
836 label_processing_state::~label_processing_state()
838 int handled = handle_pending(EOF);
843 int label_processing_state::handle_pending(int c)
849 if (c == POST_LABEL_MARKER) {
850 state = PENDING_LABEL_POST;
854 output_citation_group(rptr, count, type, fp);
860 case PENDING_LABEL_POST:
861 if (c == PRE_LABEL_MARKER) {
862 state = PENDING_LABEL_POST_PRE;
866 output_citation_group(rptr, count, type, fp);
869 put_string(post_label, fp);
873 case PENDING_LABEL_POST_PRE:
874 if (c >= LABEL_MARKER
875 && c < LABEL_MARKER + N_LABEL_TYPES
876 && c - LABEL_MARKER == type) {
878 state = PENDING_LABEL;
882 output_citation_group(rptr, count, type, fp);
885 put_string(sep_label, fp);
890 if (c == PRE_LABEL_MARKER) {
891 put_string(sep_label, fp);
896 put_string(post_label, fp);
904 void label_processing_state::process(int c)
906 if (handle_pending(c))
908 assert(state == NORMAL);
910 case PRE_LABEL_MARKER:
911 put_string(pre_label, fp);
914 case POST_LABEL_MARKER:
915 state = PENDING_POST;
918 case LABEL_MARKER + 1:
920 state = PENDING_LABEL;
921 type = label_type(c - LABEL_MARKER);
932 int rcompare(const void *p1, const void *p2)
934 return compare_reference(**(reference **)p1, **(reference **)p2);
939 void output_references()
942 if (!hash_table_size) {
943 error("nothing to reference (probably `bibliography' before `sort')");
948 if (nreferences > 0) {
951 for (i = 0; i < hash_table_size; i++)
952 if (reference_hash_table[i] != 0)
953 reference_hash_table[j++] = reference_hash_table[i];
954 assert(j == nreferences);
955 for (; j < hash_table_size; j++)
956 reference_hash_table[j] = 0;
957 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
958 for (i = 0; i < nreferences; i++)
959 reference_hash_table[i]->set_number(i);
960 compute_labels(reference_hash_table, nreferences);
962 if (outfp != stdout) {
965 label_processing_state state(citation, ncitations, stdout);
967 while ((c = getc(outfp)) != EOF)
974 if (nreferences > 0) {
975 fputs(".]<\n", outfp);
976 for (int i = 0; i < nreferences; i++) {
977 if (sort_fields.length() > 0)
978 reference_hash_table[i]->print_sort_key_comment(outfp);
979 if (label_in_reference) {
980 fputs(".ds [F ", outfp);
981 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
982 if (label.length() > 0
983 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
985 put_string(label, outfp);
988 reference_hash_table[i]->output(outfp);
989 delete reference_hash_table[i];
990 reference_hash_table[i] = 0;
992 fputs(".]>\n", outfp);
998 static reference *find_reference(const char *query, int query_len)
1000 // This is so that error messages look better.
1001 while (query_len > 0 && csspace(query[query_len - 1]))
1004 for (int i = 0; i < query_len; i++)
1005 str += query[i] == '\n' ? ' ' : query[i];
1007 possibly_load_default_database();
1008 search_list_iterator iter(&database_list, str.contents());
1012 if (!iter.next(&start, &len, &rid)) {
1013 error("no matches for `%1'", str.contents());
1016 const char *end = start + len;
1017 while (start < end) {
1020 while (start < end && *start++ != '\n')
1024 error("found a reference for `%1' but it didn't contain any fields",
1028 reference *result = new reference(start, end - start, &rid);
1029 if (iter.next(&start, &len, &rid))
1030 warning("multiple matches for `%1'", str.contents());
1034 static reference *make_reference(const string &str, unsigned *flagsp)
1036 const char *start = str.contents();
1037 const char *end = start + str.length();
1038 const char *ptr = start;
1042 while (ptr < end && *ptr++ != '\n')
1046 for (; start < ptr; start++) {
1048 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1049 | FORCE_LEFT_BRACKET)));
1050 else if (*start == '[')
1051 *flagsp |= FORCE_LEFT_BRACKET;
1052 else if (*start == ']')
1053 *flagsp |= FORCE_RIGHT_BRACKET;
1054 else if (!csspace(*start))
1058 error("empty reference");
1059 return new reference;
1061 reference *database_ref = 0;
1063 database_ref = find_reference(start, ptr - start);
1064 reference *inline_ref = 0;
1066 inline_ref = new reference(ptr, end - ptr);
1069 database_ref->merge(*inline_ref);
1071 return database_ref;
1076 else if (database_ref)
1077 return database_ref;
1079 return new reference;
1082 static void do_ref(const string &str)
1085 (void)store_reference(str);
1087 (void)immediately_handle_reference(str);
1088 immediately_output_references();
1092 static void trim_blanks(string &str)
1094 const char *start = str.contents();
1095 const char *end = start + str.length();
1096 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1098 str.set_length(end - start);
1101 void do_bib(const char *filename)
1104 if (strcmp(filename, "-") == 0)
1108 fp = fopen(filename, "r");
1110 error("can't open `%1': %2", filename, strerror(errno));
1113 current_filename = filename;
1116 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1123 if (invalid_input_char(c)) {
1124 error("invalid input character code %1", c);
1152 else if (csspace(c)) {
1167 else if (csspace(c))
1182 state = c == '\n' ? BODY_START : BODY;
1211 // from the Dragon Book
1213 unsigned hash_string(const char *s, int len)
1215 const char *end = s + len;
1220 if ((g = h & 0xf0000000) != 0) {
1228 int next_size(int n)
1230 static const int table_sizes[] = {
1231 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1232 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1233 16000057, 32000011, 64000031, 128000003, 0
1237 for (p = table_sizes; *p <= n && *p != 0; p++)