3 * Copyright (C) 1984-2005 Mark Nudelman
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Less License, as specified in the README file.
8 * For more information about less, or for information on how to
9 * contact the author, see the README file.
14 * Routines to search a file for a pattern.
20 #define MINPOS(a,b) (((a) < (b)) ? (a) : (b))
21 #define MAXPOS(a,b) (((a) > (b)) ? (a) : (b))
23 #if HAVE_POSIX_REGCOMP
26 #define REGCOMP_FLAG (more_mode ? 0 : REG_EXTENDED)
28 #define REGCOMP_FLAG 0
50 extern int how_search;
54 extern int jump_sline;
58 extern int status_col;
59 extern POSITION start_attnpos;
60 extern POSITION end_attnpos;
62 extern int hilite_search;
63 extern int screen_trashed;
64 extern int size_linebuf;
66 extern int can_goto_line;
67 static int hide_hilite;
68 static POSITION prep_startpos;
69 static POSITION prep_endpos;
73 struct hilite *hl_next;
77 static struct hilite hilite_anchor = { NULL, NULL_POSITION, NULL_POSITION };
78 #define hl_first hl_next
82 * These are the static variables that represent the "remembered"
85 #if HAVE_POSIX_REGCOMP
86 static regex_t *regpattern = NULL;
89 pcre *regpattern = NULL;
95 static char *cpattern = NULL;
98 static struct regexp *regpattern = NULL;
101 static int is_caseless;
102 static int is_ucase_pattern;
103 static int last_search_type;
104 static char *last_pattern = NULL;
107 * Convert text. Perform one or more of these transformations:
109 #define CVT_TO_LC 01 /* Convert upper-case to lower-case */
110 #define CVT_BS 02 /* Do backspace processing */
111 #define CVT_CRLF 04 /* Remove CR after LF */
112 #define CVT_ANSI 010 /* Remove ANSI escape sequences */
115 cvt_text(odst, osrc, ops)
123 for (src = osrc, dst = odst; *src != '\0'; src++)
125 if ((ops & CVT_TO_LC) && IS_UPPER(*src))
126 /* Convert uppercase to lowercase. */
127 *dst++ = TO_LOWER(*src);
128 else if ((ops & CVT_BS) && *src == '\b' && dst > odst)
129 /* Delete BS and preceding char. */
131 else if ((ops & CVT_ANSI) && *src == ESC)
133 /* Skip to end of ANSI escape sequence. */
134 while (src[1] != '\0')
135 if (!is_ansi_middle(*++src))
141 if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
147 * Determine which conversions to perform.
153 if (is_caseless || bs_mode == BS_SPECIAL)
157 if (bs_mode == BS_SPECIAL)
159 if (bs_mode != BS_CONTROL)
161 } else if (bs_mode != BS_CONTROL)
165 if (ctldisp == OPT_ONPLUS)
171 * Are there any uppercase letters in this string?
179 for (p = s; *p != '\0'; p++)
186 * Is there a previous (remembered) search pattern?
191 if (last_search_type & SRCH_NO_REGEX)
192 return (last_pattern != NULL);
193 #if HAVE_POSIX_REGCOMP
194 return (regpattern != NULL);
197 return (regpattern != NULL);
200 return (re_pattern != 0);
203 return (cpattern != NULL);
206 return (regpattern != NULL);
209 return (last_pattern != NULL);
215 * Repaint the hilites currently displayed on the screen.
216 * Repaint each line which contains highlighted text.
217 * If on==0, force all hilites off.
226 int save_hide_hilite;
231 save_hide_hilite = hide_hilite;
242 hide_hilite = save_hide_hilite;
246 for (slinenum = TOP; slinenum < TOP + sc_height-1; slinenum++)
248 pos = position(slinenum);
249 if (pos == NULL_POSITION)
251 epos = position(slinenum+1);
254 * If any character in the line is highlighted,
257 * {{ This doesn't work -- if line is drawn with highlights
258 * which should be erased (e.g. toggle -i with status column),
259 * we must redraw the line even if it has no highlights.
260 * For now, just repaint every line. }}
262 if (is_hilited(pos, epos, 1, NULL))
265 (void) forw_line(pos);
270 hide_hilite = save_hide_hilite;
274 * Clear the attn hilite.
280 POSITION old_start_attnpos;
281 POSITION old_end_attnpos;
285 if (start_attnpos == NULL_POSITION)
287 old_start_attnpos = start_attnpos;
288 old_end_attnpos = end_attnpos;
289 start_attnpos = end_attnpos = NULL_POSITION;
299 for (slinenum = TOP; slinenum < TOP + sc_height-1; slinenum++)
301 pos = position(slinenum);
302 if (pos == NULL_POSITION)
304 epos = position(slinenum+1);
305 if (pos < old_end_attnpos &&
306 (epos == NULL_POSITION || epos > old_start_attnpos))
308 (void) forw_line(pos);
317 * Hide search string highlighting.
324 error("No previous regular expression", NULL_PARG);
328 hide_hilite = !hide_hilite;
334 * Compile a search pattern, for future use by match_pattern.
337 compile_pattern(pattern, search_type)
341 if ((search_type & SRCH_NO_REGEX) == 0)
343 #if HAVE_POSIX_REGCOMP
344 regex_t *s = (regex_t *) ecalloc(1, sizeof(regex_t));
345 if (regcomp(s, pattern, REGCOMP_FLAG))
348 error("Invalid pattern", NULL_PARG);
351 if (regpattern != NULL)
357 const char *errstring;
360 comp = pcre_compile(pattern, 0,
361 &errstring, &erroffset, NULL);
364 parg.p_string = (char *) errstring;
372 if ((parg.p_string = re_comp(pattern)) != NULL)
381 if ((s = regcmp(pattern, 0)) == NULL)
383 error("Invalid pattern", NULL_PARG);
386 if (cpattern != NULL)
392 if ((s = regcomp(pattern)) == NULL)
395 * regcomp has already printed an error message
400 if (regpattern != NULL)
406 if (last_pattern != NULL)
408 last_pattern = (char *) calloc(1, strlen(pattern)+1);
409 if (last_pattern != NULL)
410 strcpy(last_pattern, pattern);
412 last_search_type = search_type;
417 * Forget that we have a compiled pattern.
422 #if HAVE_POSIX_REGCOMP
423 if (regpattern != NULL)
428 if (regpattern != NULL)
429 pcre_free(regpattern);
436 if (cpattern != NULL)
441 if (regpattern != NULL)
449 * Perform a pattern match with the previously compiled pattern.
450 * Set sp and ep to the start and end of the matched string.
453 match_pattern(line, sp, ep, notbol)
461 if (last_search_type & SRCH_NO_REGEX)
462 return (match(last_pattern, line, sp, ep));
464 #if HAVE_POSIX_REGCOMP
467 int flags = (notbol) ? REG_NOTBOL : 0;
468 matched = !regexec(regpattern, line, 1, &rm, flags);
472 *sp = line + rm.rm_so;
473 *ep = line + rm.rm_eo;
482 int flags = (notbol) ? PCRE_NOTBOL : 0;
484 matched = pcre_exec(regpattern, NULL, line, strlen(line),
485 0, flags, ovector, 3) >= 0;
488 *sp = line + ovector[0];
489 *ep = line + ovector[1];
493 matched = (re_exec(line) == 1);
495 * re_exec doesn't seem to provide a way to get the matched string.
500 *ep = regex(cpattern, line);
501 matched = (*ep != NULL);
508 matched = regexec2(regpattern, line, notbol);
510 matched = regexec(regpattern, line);
514 *sp = regpattern->startp[0];
515 *ep = regpattern->endp[0];
518 matched = match(last_pattern, line, sp, ep);
525 * Clear the hilite list.
531 struct hilite *nexthl;
533 for (hl = hilite_anchor.hl_first; hl != NULL; hl = nexthl)
535 nexthl = hl->hl_next;
538 hilite_anchor.hl_first = NULL;
539 prep_startpos = prep_endpos = NULL_POSITION;
543 * Should any characters in a specified range be highlighted?
546 is_hilited_range(pos, epos)
553 * Look at each highlight and see if any part of it falls in the range.
555 for (hl = hilite_anchor.hl_first; hl != NULL; hl = hl->hl_next)
557 if (hl->hl_endpos > pos &&
558 (epos == NULL_POSITION || epos > hl->hl_startpos))
565 * Should any characters in a specified range be highlighted?
566 * If nohide is nonzero, don't consider hide_hilite.
569 is_hilited(pos, epos, nohide, p_matches)
577 if (p_matches != NULL)
581 start_attnpos != NULL_POSITION &&
583 (epos == NULL_POSITION || epos > start_attnpos))
585 * The attn line overlaps this range.
589 match = is_hilited_range(pos, epos);
593 if (p_matches != NULL)
595 * Report matches, even if we're hiding highlights.
599 if (hilite_search == 0)
601 * Not doing highlighting.
605 if (!nohide && hide_hilite)
607 * Highlighting is hidden.
615 * Add a new hilite to a hilite list.
618 add_hilite(anchor, hl)
619 struct hilite *anchor;
625 * Hilites are sorted in the list; find where new one belongs.
626 * Insert new one after ihl.
628 for (ihl = anchor; ihl->hl_next != NULL; ihl = ihl->hl_next)
630 if (ihl->hl_next->hl_startpos > hl->hl_startpos)
635 * Truncate hilite so it doesn't overlap any existing ones
636 * above and below it.
639 hl->hl_startpos = MAXPOS(hl->hl_startpos, ihl->hl_endpos);
640 if (ihl->hl_next != NULL)
641 hl->hl_endpos = MINPOS(hl->hl_endpos, ihl->hl_next->hl_startpos);
642 if (hl->hl_startpos >= hl->hl_endpos)
645 * Hilite was truncated out of existence.
650 hl->hl_next = ihl->hl_next;
655 adj_hilite_ansi(cvt_ops, line, npos)
660 if (cvt_ops & CVT_ANSI)
661 while (**line == ESC)
664 * Found an ESC. The file position moves
665 * forward past the entire ANSI escape sequence.
669 while (**line != '\0')
672 if (!is_ansi_middle(*(*line)++))
679 * Adjust hl_startpos & hl_endpos to account for backspace processing.
682 adj_hilite(anchor, linepos, cvt_ops)
683 struct hilite *anchor;
694 * The line was already scanned and hilites were added (in hilite_line).
695 * But it was assumed that each char position in the line
696 * correponds to one char position in the file.
697 * This may not be true if there are backspaces in the line.
698 * Get the raw line again. Look at each character.
700 (void) forw_raw_line(linepos, &line);
701 opos = npos = linepos;
702 hl = anchor->hl_first;
707 * See if we need to adjust the current hl_startpos or
708 * hl_endpos. After adjusting startpos[i], move to endpos[i].
709 * After adjusting endpos[i], move to startpos[i+1].
710 * The hilite list must be sorted thus:
711 * startpos[0] < endpos[0] <= startpos[1] < endpos[1] <= etc.
713 if (checkstart && hl->hl_startpos == opos)
715 hl->hl_startpos = npos;
717 continue; /* {{ not really necessary }} */
718 } else if (!checkstart && hl->hl_endpos == opos)
720 hl->hl_endpos = npos;
723 continue; /* {{ necessary }} */
727 adj_hilite_ansi(cvt_ops, &line, &npos);
731 if (cvt_ops & CVT_BS)
733 while (*line == '\b')
737 adj_hilite_ansi(cvt_ops, &line, &npos);
745 * Found a backspace. The file position moves
746 * forward by 2 relative to the processed line
747 * which was searched in hilite_line.
757 * Make a hilite for each string in a physical line which matches
758 * the current pattern.
759 * sp,ep delimit the first match already found.
762 hilite_line(linepos, line, sp, ep, cvt_ops)
771 struct hilite hilites;
773 if (sp == NULL || ep == NULL)
776 * sp and ep delimit the first match in the line.
777 * Mark the corresponding file positions, then
778 * look for further matches and mark them.
779 * {{ This technique, of calling match_pattern on subsequent
780 * substrings of the line, may mark more than is correct
781 * if the pattern starts with "^". This bug is fixed
782 * for those regex functions that accept a notbol parameter
783 * (currently POSIX and V8-with-regexec2). }}
787 * Put the hilites into a temporary list until they're adjusted.
789 hilites.hl_first = NULL;
794 * Assume that each char position in the "line"
795 * buffer corresponds to one char position in the file.
796 * This is not quite true; we need to adjust later.
798 hl = (struct hilite *) ecalloc(1, sizeof(struct hilite));
799 hl->hl_startpos = linepos + (sp-line);
800 hl->hl_endpos = linepos + (ep-line);
801 add_hilite(&hilites, hl);
804 * If we matched more than zero characters,
805 * move to the first char after the string we matched.
806 * If we matched zero, just move to the next char.
810 else if (*searchp != '\0')
812 else /* end of line */
814 } while (match_pattern(searchp, &sp, &ep, 1));
817 * If there were backspaces in the original line, they
818 * were removed, and hl_startpos/hl_endpos are not correct.
819 * {{ This is very ugly. }}
821 adj_hilite(&hilites, linepos, cvt_ops);
824 * Now put the hilites into the real list.
826 while ((hl = hilites.hl_next) != NULL)
828 hilites.hl_next = hl->hl_next;
829 add_hilite(&hilite_anchor, hl);
835 * Change the caseless-ness of searches.
836 * Updates the internal search state to reflect a change in the -i flag.
841 if (!is_ucase_pattern)
843 * Pattern did not have uppercase.
844 * Just set the search caselessness to the global caselessness.
846 is_caseless = caseless;
849 * Pattern did have uppercase.
850 * Discard the pattern; we can't change search caselessness now.
857 * Find matching text which is currently on screen and highlight it.
862 struct scrpos scrpos;
865 if (scrpos.pos == NULL_POSITION)
867 prep_hilite(scrpos.pos, position(BOTTOM_PLUS_ONE), -1);
872 * Change highlighting parameters.
878 * Erase any highlights currently on screen.
883 if (hilite_search == OPT_ONPLUS)
885 * Display highlights.
892 * Figure out where to start a search.
895 search_pos(search_type)
904 * Start at the beginning (or end) of the file.
905 * The empty_screen() case is mainly for
906 * command line initiated searches;
907 * for example, "+/xyz" on the command line.
908 * Also for multi-file (SRCH_PAST_EOF) searches.
910 if (search_type & SRCH_FORW)
916 if (pos == NULL_POSITION)
918 (void) ch_end_seek();
927 * Search does not include current screen.
929 if (search_type & SRCH_FORW)
930 linenum = BOTTOM_PLUS_ONE;
933 pos = position(linenum);
937 * Search includes current screen.
938 * It starts at the jump target (if searching backwards),
939 * or at the jump target plus one (if forwards).
941 linenum = adjsline(jump_sline);
942 pos = position(linenum);
943 if (search_type & SRCH_FORW)
945 pos = forw_raw_line(pos, (char **)NULL);
946 while (pos == NULL_POSITION)
948 if (++linenum >= sc_height)
950 pos = position(linenum);
954 while (pos == NULL_POSITION)
958 pos = position(linenum);
966 * Search a subset of the file, specified by start/end position.
969 search_range(pos, endpos, search_type, matches, maxlines, plinepos, pendpos)
983 POSITION linepos, oldpos;
985 linenum = find_linenum(pos);
990 * Get lines until we find a matching one or until
991 * we hit end-of-file (or beginning-of-file if we're
992 * going backwards), or until we hit the end position.
997 * A signal aborts the search.
1002 if ((endpos != NULL_POSITION && pos >= endpos) || maxlines == 0)
1005 * Reached end position without a match.
1007 if (pendpos != NULL)
1014 if (search_type & SRCH_FORW)
1017 * Read the next line, and save the
1018 * starting position of that line in linepos.
1021 pos = forw_raw_line(pos, &line);
1027 * Read the previous line and save the
1028 * starting position of that line in linepos.
1030 pos = back_raw_line(pos, &line);
1036 if (pos == NULL_POSITION)
1039 * Reached EOF/BOF without a match.
1041 if (pendpos != NULL)
1047 * If we're using line numbers, we might as well
1048 * remember the information we have now (the position
1049 * and line number of the current line).
1050 * Don't do it for every line because it slows down
1051 * the search. Remember the line number only if
1052 * we're "far" from the last place we remembered it.
1054 if (linenums && abs((int)(pos - oldpos)) > 1024)
1055 add_lnum(linenum, pos);
1059 * If it's a caseless search, convert the line to lowercase.
1060 * If we're doing backspace processing, delete backspaces.
1062 cvt_ops = get_cvt_ops();
1063 cvt_text(line, line, cvt_ops);
1066 * Test the next line to see if we have a match.
1067 * We are successful if we either want a match and got one,
1068 * or if we want a non-match and got one.
1070 line_match = match_pattern(line, &sp, &ep, 0);
1071 line_match = (!(search_type & SRCH_NO_MATCH) && line_match) ||
1072 ((search_type & SRCH_NO_MATCH) && !line_match);
1078 if (search_type & SRCH_FIND_ALL)
1082 * We are supposed to find all matches in the range.
1083 * Just add the matches in this line to the
1084 * hilite list and keep searching.
1087 hilite_line(linepos, line, sp, ep, cvt_ops);
1089 } else if (--matches <= 0)
1092 * Found the one match we're looking for.
1096 if (hilite_search == OPT_ON)
1099 * Clear the hilite list and add only
1100 * the matches in this one line.
1104 hilite_line(linepos, line, sp, ep, cvt_ops);
1107 if (plinepos != NULL)
1108 *plinepos = linepos;
1115 * Search for the n-th occurrence of a specified pattern,
1116 * either forward or backward.
1117 * Return the number of matches not yet found in this file
1118 * (that is, n minus the number of matches found).
1119 * Return -1 if the search should be aborted.
1120 * Caller may continue the search in another file
1121 * if less than n matches are found in this file.
1124 search(search_type, pattern, n)
1132 if (pattern == NULL || *pattern == '\0')
1135 * A null pattern means use the previously compiled pattern.
1137 if (!prev_pattern())
1139 error("No previous regular expression", NULL_PARG);
1142 if ((search_type & SRCH_NO_REGEX) !=
1143 (last_search_type & SRCH_NO_REGEX))
1145 error("Please re-enter search pattern", NULL_PARG);
1149 if (hilite_search == OPT_ON)
1152 * Erase the highlights currently on screen.
1153 * If the search fails, we'll redisplay them later.
1157 if (hilite_search == OPT_ONPLUS && hide_hilite)
1160 * Highlight any matches currently on screen,
1161 * before we actually start the search.
1171 * Compile the pattern.
1173 ucase = is_ucase(pattern);
1174 if (caseless == OPT_ONPLUS)
1175 cvt_text(pattern, pattern, CVT_TO_LC);
1176 if (compile_pattern(pattern, search_type) < 0)
1179 * Ignore case if -I is set OR
1180 * -i is set AND the pattern is all lowercase.
1182 is_ucase_pattern = ucase;
1183 if (is_ucase_pattern && caseless != OPT_ONPLUS)
1186 is_caseless = caseless;
1191 * Erase the highlights currently on screen.
1192 * Also permanently delete them from the hilite list.
1198 if (hilite_search == OPT_ONPLUS)
1201 * Highlight any matches currently on screen,
1202 * before we actually start the search.
1210 * Figure out where to start the search.
1212 pos = search_pos(search_type);
1213 if (pos == NULL_POSITION)
1216 * Can't find anyplace to start searching from.
1218 if (search_type & SRCH_PAST_EOF)
1220 /* repaint(); -- why was this here? */
1221 error("Nothing to search", NULL_PARG);
1225 n = search_range(pos, NULL_POSITION, search_type, n, -1,
1226 &pos, (POSITION*)NULL);
1230 * Search was unsuccessful.
1233 if (hilite_search == OPT_ON && n > 0)
1235 * Redisplay old hilites.
1242 if (!(search_type & SRCH_NO_MOVE))
1245 * Go to the matching line.
1247 jump_loc(pos, jump_sline);
1251 if (hilite_search == OPT_ON)
1253 * Display new hilites in the matching line.
1263 * Prepare hilites in a given range of the file.
1265 * The pair (prep_startpos,prep_endpos) delimits a contiguous region
1266 * of the file that has been "prepared"; that is, scanned for matches for
1267 * the current search pattern, and hilites have been created for such matches.
1268 * If prep_startpos == NULL_POSITION, the prep region is empty.
1269 * If prep_endpos == NULL_POSITION, the prep region extends to EOF.
1270 * prep_hilite asks that the range (spos,epos) be covered by the prep region.
1273 prep_hilite(spos, epos, maxlines)
1278 POSITION nprep_startpos = prep_startpos;
1279 POSITION nprep_endpos = prep_endpos;
1285 * Search beyond where we're asked to search, so the prep region covers
1286 * more than we need. Do one big search instead of a bunch of small ones.
1288 #define SEARCH_MORE (3*size_linebuf)
1290 if (!prev_pattern())
1294 * If we're limited to a max number of lines, figure out the
1295 * file position we should stop at.
1298 max_epos = NULL_POSITION;
1302 for (i = 0; i < maxlines; i++)
1303 max_epos = forw_raw_line(max_epos, (char **)NULL);
1308 * The range that we need to search (spos,epos); and the range that
1309 * the "prep" region will then cover (nprep_startpos,nprep_endpos).
1312 if (prep_startpos == NULL_POSITION ||
1313 (epos != NULL_POSITION && epos < prep_startpos) ||
1317 * New range is not contiguous with old prep region.
1318 * Discard the old prep region and start a new one.
1321 if (epos != NULL_POSITION)
1322 epos += SEARCH_MORE;
1323 nprep_startpos = spos;
1327 * New range partially or completely overlaps old prep region.
1329 if (epos == NULL_POSITION)
1332 * New range goes to end of file.
1335 } else if (epos > prep_endpos)
1338 * New range ends after old prep region.
1339 * Extend prep region to end at end of new range.
1341 epos += SEARCH_MORE;
1342 } else /* (epos <= prep_endpos) */
1345 * New range ends within old prep region.
1346 * Truncate search to end at start of old prep region.
1348 epos = prep_startpos;
1351 if (spos < prep_startpos)
1354 * New range starts before old prep region.
1355 * Extend old prep region backwards to start at
1356 * start of new range.
1358 if (spos < SEARCH_MORE)
1361 spos -= SEARCH_MORE;
1362 nprep_startpos = spos;
1363 } else /* (spos >= prep_startpos) */
1366 * New range starts within or after old prep region.
1367 * Trim search to start at end of old prep region.
1373 if (epos != NULL_POSITION && max_epos != NULL_POSITION &&
1376 * Don't go past the max position we're allowed.
1380 if (epos == NULL_POSITION || epos > spos)
1382 result = search_range(spos, epos, SRCH_FORW|SRCH_FIND_ALL, 0,
1383 maxlines, (POSITION*)NULL, &new_epos);
1386 if (prep_endpos == NULL_POSITION || new_epos > prep_endpos)
1387 nprep_endpos = new_epos;
1389 prep_startpos = nprep_startpos;
1390 prep_endpos = nprep_endpos;
1395 * Simple pattern matching function.
1396 * It supports no metacharacters like *, etc.
1399 match(pattern, buf, pfound, pend)
1400 char *pattern, *buf;
1401 char **pfound, **pend;
1403 register char *pp, *lp;
1405 for ( ; *buf != '\0'; buf++)
1407 for (pp = pattern, lp = buf; *pp == *lp; pp++, lp++)
1408 if (*pp == '\0' || *lp == '\0')
1424 * This function is called by the V8 regcomp to report
1425 * errors in regular expressions.