head	1.19;
access;
symbols
	perseant-exfatfs-base-20250801:1.19
	perseant-exfatfs-base-20240630:1.19
	perseant-exfatfs:1.19.0.36
	perseant-exfatfs-base:1.19
	cjep_sun2x:1.19.0.34
	cjep_sun2x-base:1.19
	cjep_staticlib_x-base1:1.19
	cjep_staticlib_x:1.19.0.32
	cjep_staticlib_x-base:1.19
	phil-wifi-20200421:1.19
	phil-wifi-20200411:1.19
	phil-wifi-20200406:1.19
	pgoyette-compat-merge-20190127:1.19
	pgoyette-compat-20190127:1.19
	pgoyette-compat-20190118:1.19
	pgoyette-compat-1226:1.19
	pgoyette-compat-1126:1.19
	pgoyette-compat-1020:1.19
	pgoyette-compat-0930:1.19
	pgoyette-compat-0906:1.19
	pgoyette-compat-0728:1.19
	pgoyette-compat-0625:1.19
	pgoyette-compat-0521:1.19
	pgoyette-compat-0502:1.19
	pgoyette-compat-0422:1.19
	pgoyette-compat-0415:1.19
	pgoyette-compat-0407:1.19
	pgoyette-compat-0330:1.19
	pgoyette-compat-0322:1.19
	pgoyette-compat-0315:1.19
	pgoyette-compat:1.19.0.30
	pgoyette-compat-base:1.19
	perseant-stdc-iso10646:1.19.0.28
	perseant-stdc-iso10646-base:1.19
	prg-localcount2-base3:1.19
	prg-localcount2-base2:1.19
	prg-localcount2-base1:1.19
	prg-localcount2:1.19.0.26
	prg-localcount2-base:1.19
	pgoyette-localcount-20170426:1.19
	bouyer-socketcan-base1:1.19
	pgoyette-localcount-20170320:1.19
	bouyer-socketcan:1.19.0.24
	bouyer-socketcan-base:1.19
	pgoyette-localcount-20170107:1.19
	pgoyette-localcount-20161104:1.19
	localcount-20160914:1.19
	pgoyette-localcount-20160806:1.19
	pgoyette-localcount-20160726:1.19
	pgoyette-localcount:1.19.0.22
	pgoyette-localcount-base:1.19
	netbsd-5-2-3-RELEASE:1.19
	netbsd-5-1-5-RELEASE:1.19
	yamt-pagecache-base9:1.19
	yamt-pagecache-tag8:1.19
	tls-earlyentropy:1.19.0.18
	tls-earlyentropy-base:1.19
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.19
	riastradh-drm2-base3:1.19
	netbsd-5-2-2-RELEASE:1.19
	netbsd-5-1-4-RELEASE:1.19
	netbsd-5-2-1-RELEASE:1.19
	netbsd-5-1-3-RELEASE:1.19
	agc-symver:1.19.0.20
	agc-symver-base:1.19
	tls-maxphys-base:1.19
	yamt-pagecache-base8:1.19
	netbsd-5-2:1.19.0.16
	yamt-pagecache-base7:1.19
	netbsd-5-2-RELEASE:1.19
	netbsd-5-2-RC1:1.19
	yamt-pagecache-base6:1.19
	yamt-pagecache-base5:1.19
	yamt-pagecache-base4:1.19
	netbsd-5-1-2-RELEASE:1.19
	netbsd-5-1-1-RELEASE:1.19
	yamt-pagecache-base3:1.19
	yamt-pagecache-base2:1.19
	yamt-pagecache:1.19.0.14
	yamt-pagecache-base:1.19
	bouyer-quota2-nbase:1.19
	bouyer-quota2:1.19.0.12
	bouyer-quota2-base:1.19
	matt-nb5-pq3:1.19.0.10
	matt-nb5-pq3-base:1.19
	netbsd-5-1:1.19.0.8
	netbsd-5-1-RELEASE:1.19
	netbsd-5-1-RC4:1.19
	netbsd-5-1-RC3:1.19
	netbsd-5-1-RC2:1.19
	netbsd-5-1-RC1:1.19
	netbsd-5-0-2-RELEASE:1.19
	netbsd-5-0-1-RELEASE:1.19
	jym-xensuspend-nbase:1.19
	netbsd-5-0:1.19.0.6
	netbsd-5-0-RELEASE:1.19
	netbsd-5-0-RC4:1.19
	netbsd-5-0-RC3:1.19
	netbsd-5-0-RC2:1.19
	jym-xensuspend:1.19.0.4
	jym-xensuspend-base:1.19
	netbsd-5-0-RC1:1.19
	netbsd-5:1.19.0.2
	netbsd-5-base:1.19
	matt-mips64-base2:1.18
	matt-mips64:1.18.0.22
	mjf-devfs2:1.18.0.20
	mjf-devfs2-base:1.18
	netbsd-4-0-1-RELEASE:1.18
	wrstuden-revivesa-base-3:1.18
	wrstuden-revivesa-base-2:1.18
	wrstuden-fixsa-newbase:1.18
	wrstuden-revivesa-base-1:1.18
	yamt-pf42-base4:1.18
	yamt-pf42-base3:1.18
	hpcarm-cleanup-nbase:1.18
	yamt-pf42-baseX:1.18
	yamt-pf42-base2:1.18
	wrstuden-revivesa:1.18.0.18
	wrstuden-revivesa-base:1.18
	yamt-pf42:1.18.0.16
	yamt-pf42-base:1.18
	keiichi-mipv6-nbase:1.18
	keiichi-mipv6:1.18.0.14
	keiichi-mipv6-base:1.18
	matt-armv6-nbase:1.18
	matt-armv6-prevmlocking:1.18
	wrstuden-fixsa-base-1:1.18
	netbsd-4-0:1.18.0.12
	netbsd-4-0-RELEASE:1.18
	cube-autoconf:1.18.0.10
	cube-autoconf-base:1.18
	netbsd-4-0-RC5:1.18
	netbsd-4-0-RC4:1.18
	netbsd-4-0-RC3:1.18
	netbsd-4-0-RC2:1.18
	netbsd-4-0-RC1:1.18
	matt-armv6:1.18.0.8
	matt-armv6-base:1.18
	matt-mips64-base:1.18
	hpcarm-cleanup:1.18.0.6
	hpcarm-cleanup-base:1.18
	netbsd-3-1-1-RELEASE:1.17
	netbsd-3-0-3-RELEASE:1.17
	wrstuden-fixsa:1.18.0.4
	wrstuden-fixsa-base:1.18
	abandoned-netbsd-4-base:1.17
	abandoned-netbsd-4:1.17.0.8
	netbsd-3-1:1.17.0.10
	netbsd-3-1-RELEASE:1.17
	netbsd-3-0-2-RELEASE:1.17
	netbsd-3-1-RC4:1.17
	netbsd-3-1-RC3:1.17
	netbsd-3-1-RC2:1.17
	netbsd-3-1-RC1:1.17
	netbsd-4:1.18.0.2
	netbsd-4-base:1.18
	chap-midi-nbase:1.17
	netbsd-3-0-1-RELEASE:1.17
	chap-midi:1.17.0.6
	chap-midi-base:1.17
	netbsd-3-0:1.17.0.4
	netbsd-3-0-RELEASE:1.17
	netbsd-3-0-RC6:1.17
	netbsd-3-0-RC5:1.17
	netbsd-3-0-RC4:1.17
	netbsd-3-0-RC3:1.17
	netbsd-3-0-RC2:1.17
	netbsd-3-0-RC1:1.17
	netbsd-2-0-3-RELEASE:1.15.2.2
	netbsd-2-1:1.15.4.1.0.2
	netbsd-2-1-RELEASE:1.15.4.1
	netbsd-2-1-RC6:1.15.4.1
	netbsd-2-1-RC5:1.15.4.1
	netbsd-2-1-RC4:1.15.4.1
	netbsd-2-1-RC3:1.15.4.1
	netbsd-2-1-RC2:1.15.4.1
	netbsd-2-1-RC1:1.15.4.1
	netbsd-2-0-2-RELEASE:1.15.2.2
	netbsd-3:1.17.0.2
	netbsd-3-base:1.17
	netbsd-2-0-1-RELEASE:1.15
	netbsd-2:1.15.0.4
	netbsd-2-base:1.15
	netbsd-2-0-RELEASE:1.15
	netbsd-2-0-RC5:1.15
	netbsd-2-0-RC4:1.15
	netbsd-2-0-RC3:1.15
	netbsd-2-0-RC2:1.15
	netbsd-2-0-RC1:1.15
	netbsd-2-0:1.15.0.2
	netbsd-2-0-base:1.15
	netbsd-1-6-PATCH002-RELEASE:1.14
	netbsd-1-6-PATCH002:1.14
	netbsd-1-6-PATCH002-RC4:1.14
	netbsd-1-6-PATCH002-RC3:1.14
	netbsd-1-6-PATCH002-RC2:1.14
	netbsd-1-6-PATCH002-RC1:1.14
	netbsd-1-6-PATCH001:1.14
	netbsd-1-6-PATCH001-RELEASE:1.14
	netbsd-1-6-PATCH001-RC3:1.14
	netbsd-1-6-PATCH001-RC2:1.14
	netbsd-1-6-PATCH001-RC1:1.14
	fvdl_fs64_base:1.14
	netbsd-1-6-RELEASE:1.14
	netbsd-1-6-RC3:1.14
	netbsd-1-6-RC2:1.14
	netbsd-1-6-RC1:1.14
	netbsd-1-6:1.14.0.2
	netbsd-1-6-base:1.14
	netbsd-1-5-PATCH003:1.12
	netbsd-1-5-PATCH002:1.12
	netbsd-1-5-PATCH001:1.12
	nvi_1_79:1.1.1.6
	netbsd-1-5-RELEASE:1.12
	netbsd-1-5-BETA2:1.12
	netbsd-1-5-BETA:1.12
	netbsd-1-4-PATCH003:1.11
	netbsd-1-5-ALPHA2:1.12
	netbsd-1-5:1.12.0.4
	netbsd-1-5-base:1.12
	minoura-xpg4dl-base:1.12
	minoura-xpg4dl:1.12.0.2
	netbsd-1-4-PATCH002:1.11
	wrstuden-devbsize-19991221:1.11
	wrstuden-devbsize:1.11.0.6
	wrstuden-devbsize-base:1.11
	comdex-fall-1999:1.11.0.4
	comdex-fall-1999-base:1.11
	netbsd-1-4-PATCH001:1.11
	netbsd-1-4-RELEASE:1.11
	netbsd-1-4:1.11.0.2
	netbsd-1-4-base:1.11
	netbsd-1-3-PATCH003:1.10
	netbsd-1-3-PATCH003-CANDIDATE2:1.10
	netbsd-1-3-PATCH003-CANDIDATE1:1.10
	netbsd-1-3-PATCH003-CANDIDATE0:1.10
	netbsd-1-3-PATCH002:1.10
	netbsd-1-3-PATCH001:1.10
	netbsd-1-3-RELEASE:1.10
	netbsd-1-3-BETA:1.10
	netbsd-1-3:1.10.0.2
	netbsd-1-3-base:1.10
	netbsd-1-2-PATCH001:1.10
	netbsd-1-2-RELEASE:1.10
	netbsd-1-2-BETA:1.10
	netbsd-1-2:1.10.0.4
	netbsd-1-2-base:1.10
	nvi_1_66:1.1.1.5
	BOSTIC:1.1.1
	netbsd-1-1-PATCH001:1.7
	netbsd-1-1-RELEASE:1.7
	netbsd-1-1:1.7.0.2
	netbsd-1-1-base:1.7
	netbsd-1-0-PATCH06:1.6
	netbsd-1-0-PATCH05:1.6
	netbsd-1-0-PATCH04:1.6
	netbsd-1-0-PATCH03:1.6
	netbsd-1-0-PATCH02:1.6
	netbsd-1-0-PATCH1:1.6
	netbsd-1-0-PATCH0:1.6
	netbsd-1-0-RELEASE:1.6
	netbsd-1-0:1.6.0.2
	nvi-1-34b:1.1.1.4
	nvi-1-33b:1.1.1.3
	netbsd-1-0-base:1.4
	nvi-1-11b:1.1.1.2
	nvi-1-03:1.1.1.1
	bostic-nvi:1.1.1;
locks; strict;
comment	@ * @;


1.19
date	2008.10.29.16.49.37;	author christos;	state dead;
branches;
next	1.18;

1.18
date	2006.09.27.19.58.00;	author christos;	state Exp;
branches;
next	1.17;

1.17
date	2005.02.12.12.53.23;	author aymeric;	state Exp;
branches;
next	1.16;

1.16
date	2004.11.05.19.50.12;	author dsl;	state Exp;
branches;
next	1.15;

1.15
date	2004.02.13.17.56.18;	author wiz;	state Exp;
branches
	1.15.2.1
	1.15.4.1;
next	1.14;

1.14
date	2002.04.09.01.47.34;	author thorpej;	state Exp;
branches;
next	1.13;

1.13
date	2001.03.31.11.37.50;	author aymeric;	state Exp;
branches;
next	1.12;

1.12
date	2000.03.17.02.23.17;	author christos;	state Exp;
branches;
next	1.11;

1.11
date	98.01.09.08.08.05;	author perry;	state Exp;
branches;
next	1.10;

1.10
date	96.05.20.04.49.48;	author cgd;	state Exp;
branches;
next	1.9;

1.9
date	96.05.20.04.31.56;	author mrg;	state Exp;
branches;
next	1.8;

1.8
date	96.05.20.03.48.01;	author mrg;	state Exp;
branches;
next	1.7;

1.7
date	94.12.24.17.12.42;	author cgd;	state Exp;
branches;
next	1.6;

1.6
date	94.08.17.20.13.00;	author cgd;	state Exp;
branches
	1.6.2.1;
next	1.5;

1.5
date	94.08.17.16.36.16;	author cgd;	state Exp;
branches;
next	1.4;

1.4
date	94.03.28.04.29.47;	author cgd;	state Exp;
branches;
next	1.3;

1.3
date	94.03.03.23.26.31;	author mycroft;	state Exp;
branches;
next	1.2;

1.2
date	94.01.24.06.40.41;	author cgd;	state Exp;
branches;
next	1.1;

1.1
date	94.01.24.05.53.07;	author cgd;	state Exp;
branches
	1.1.1.1;
next	;

1.15.2.1
date	2005.02.12.12.24.13;	author aymeric;	state Exp;
branches;
next	1.15.2.2;

1.15.2.2
date	2005.02.12.12.46.26;	author aymeric;	state Exp;
branches;
next	;

1.15.4.1
date	2005.05.06.14.42.22;	author riz;	state Exp;
branches;
next	;

1.6.2.1
date	94.08.17.20.13.00;	author cgd;	state dead;
branches;
next	1.6.2.2;

1.6.2.2
date	94.08.17.20.13.01;	author cgd;	state Exp;
branches;
next	;

1.1.1.1
date	94.01.24.05.53.08;	author cgd;	state Exp;
branches;
next	1.1.1.2;

1.1.1.2
date	94.03.28.02.56.01;	author cgd;	state Exp;
branches;
next	1.1.1.3;

1.1.1.3
date	94.08.17.16.17.02;	author cgd;	state Exp;
branches;
next	1.1.1.4;

1.1.1.4
date	94.08.17.19.18.50;	author cgd;	state Exp;
branches;
next	1.1.1.5;

1.1.1.5
date	96.05.20.01.57.16;	author mrg;	state Exp;
branches;
next	1.1.1.6;

1.1.1.6
date	2001.03.31.11.29.48;	author aymeric;	state Exp;
branches;
next	;


desc
@@


1.19
log
@bye old vi!
@
text
@/*	$NetBSD: ex_subst.c,v 1.18 2006/09/27 19:58:00 christos Exp $	*/

/*-
 * Copyright (c) 1992, 1993, 1994
 *	The Regents of the University of California.  All rights reserved.
 * Copyright (c) 1992, 1993, 1994, 1995, 1996
 *	Keith Bostic.  All rights reserved.
 *
 * See the LICENSE file for redistribution information.
 */

#include "config.h"

#include <sys/cdefs.h>
#ifndef lint
#if 0
static const char sccsid[] = "@@(#)ex_subst.c	10.37 (Berkeley) 9/15/96";
#else
__RCSID("$NetBSD: ex_subst.c,v 1.18 2006/09/27 19:58:00 christos Exp $");
#endif
#endif /* not lint */

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/time.h>

#include <bitstring.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "../common/common.h"
#include "../vi/vi.h"

#define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
#define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */

static int re_conv __P((SCR *, char **, size_t *, int *));
static int re_cscope_conv __P((SCR *, char **, size_t *, int *));
static int re_sub __P((SCR *,
		char *, char **, size_t *, size_t *, regmatch_t [10]));
static int re_tag_conv __P((SCR *, char **, size_t *, int *));
static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));

/*
 * ex_s --
 *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
 *
 *	Substitute on lines matching a pattern.
 *
 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
 */
int
ex_s(sp, cmdp)
	SCR *sp;
	EXCMD *cmdp;
{
	regex_t *re;
	size_t blen, len;
	u_int flags;
	int delim;
	char *bp, *ptrn, *rep, *p, *t;

	/*
	 * Skip leading white space.
	 *
	 * !!!
	 * Historic vi allowed any non-alphanumeric to serve as the
	 * substitution command delimiter.
	 *
	 * !!!
	 * If the arguments are empty, it's the same as &, i.e. we
	 * repeat the last substitution.
	 */
	if (cmdp->argc == 0)
		goto subagain;
	for (p = cmdp->argv[0]->bp,
	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
		if (!isblank(*p))
			break;
	}
	if (len == 0)
subagain:	return (ex_subagain(sp, cmdp));

	delim = *p++;
	if (isalnum(delim) || delim == '\\')
		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));

	/*
	 * !!!
	 * The full-blown substitute command reset the remembered
	 * state of the 'c' and 'g' suffixes.
	 */
	sp->c_suffix = sp->g_suffix = 0;

	/*
	 * Get the pattern string, toss escaping characters.
	 *
	 * !!!
	 * Historic vi accepted any of the following forms:
	 *
	 *	:s/abc/def/		change "abc" to "def"
	 *	:s/abc/def		change "abc" to "def"
	 *	:s/abc/			delete "abc"
	 *	:s/abc			delete "abc"
	 *
	 * QUOTING NOTE:
	 *
	 * Only toss an escaping character if it escapes a delimiter.
	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
	 * would be nice to be more regular, i.e. for each layer of
	 * escaping a single escaping character is removed, but that's
	 * not how the historic vi worked.
	 */
	for (ptrn = t = p;;) {
		if (p[0] == '\0' || p[0] == delim) {
			if (p[0] == delim)
				++p;
			/*
			 * !!!
			 * Nul terminate the pattern string -- it's passed
			 * to regcomp which doesn't understand anything else.
			 */
			*t = '\0';
			break;
		}
		if (p[0] == '\\')
			if (p[1] == delim)
				++p;
			else if (p[1] == '\\')
				*t++ = *p++;
		*t++ = *p++;
	}

	/*
	 * If the pattern string is empty, use the last RE (not just the
	 * last substitution RE).
	 */
	if (*ptrn == '\0') {
		if (sp->re == NULL) {
			ex_emsg(sp, NULL, EXM_NOPREVRE);
			return (1);
		}

		/* Re-compile the RE if necessary. */
		if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
		    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
			return (1);
		flags = 0;
	} else {
		/*
		 * !!!
		 * Compile the RE.  Historic practice is that substitutes set
		 * the search direction as well as both substitute and search
		 * RE's.  We compile the RE twice, as we don't want to bother
		 * ref counting the pattern string and (opaque) structure.
		 */
		if (re_compile(sp, ptrn, t - ptrn,
		    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
			return (1);
		if (re_compile(sp, ptrn, t - ptrn,
		    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
			return (1);
		
		flags = SUB_FIRST;
		sp->searchdir = FORWARD;
	}
	re = &sp->re_c;

	/*
	 * Get the replacement string.
	 *
	 * The special character & (\& if O_MAGIC not set) matches the
	 * entire RE.  No handling of & is required here, it's done by
	 * re_sub().
	 *
	 * The special character ~ (\~ if O_MAGIC not set) inserts the
	 * previous replacement string into this replacement string.
	 * Count ~'s to figure out how much space we need.  We could
	 * special case nonexistent last patterns or whether or not
	 * O_MAGIC is set, but it's probably not worth the effort.
	 *
	 * QUOTING NOTE:
	 *
	 * Only toss an escaping character if it escapes a delimiter or
	 * if O_MAGIC is set and it escapes a tilde.
	 *
	 * !!!
	 * If the entire replacement pattern is "%", then use the last
	 * replacement pattern.  This semantic was added to vi in System
	 * V and then percolated elsewhere, presumably around the time
	 * that it was added to their version of ed(1).
	 */
	if (p[0] == '\0' || p[0] == delim) {
		if (p[0] == delim)
			++p;
		if (sp->repl != NULL)
			free(sp->repl);
		sp->repl = NULL;
		sp->repl_len = 0;
	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
		p += p[1] == delim ? 2 : 1;
	else {
		for (rep = p, len = 0;
		    p[0] != '\0' && p[0] != delim; ++p, ++len)
			if (p[0] == '~')
				len += sp->repl_len;
		GET_SPACE_RET(sp, bp, blen, len);
		for (t = bp, len = 0, p = rep;;) {
			if (p[0] == '\0' || p[0] == delim) {
				if (p[0] == delim)
					++p;
				break;
			}
			if (p[0] == '\\') {
				if (p[1] == delim)
					++p;
				else if (p[1] == '\\') {
					*t++ = *p++;
					++len;
				} else if (p[1] == '~') {
					++p;
					if (!O_ISSET(sp, O_MAGIC))
						goto tilde;
				}
			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
tilde:				++p;
				memcpy(t, sp->repl, sp->repl_len);
				t += sp->repl_len;
				len += sp->repl_len;
				continue;
			}
			*t++ = *p++;
			++len;
		}
		if ((sp->repl_len = len) != 0) {
			if (sp->repl != NULL)
				free(sp->repl);
			if ((sp->repl = malloc(len)) == NULL) {
				msgq(sp, M_SYSERR, NULL);
				FREE_SPACE(sp, bp, blen);
				return (1);
			}
			memcpy(sp->repl, bp, len);
		}
		FREE_SPACE(sp, bp, blen);
	}
	return (s(sp, cmdp, p, re, flags));
}

/*
 * ex_subagain --
 *	[line [,line]] & [cgr] [count] [#lp]]
 *
 *	Substitute using the last substitute RE and replacement pattern.
 *
 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
 */
int
ex_subagain(sp, cmdp)
	SCR *sp;
	EXCMD *cmdp;
{
	if (sp->subre == NULL) {
		ex_emsg(sp, NULL, EXM_NOPREVRE);
		return (1);
	}
	if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
	    sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
		return (1);
	return (s(sp,
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
}

/*
 * ex_subtilde --
 *	[line [,line]] ~ [cgr] [count] [#lp]]
 *
 *	Substitute using the last RE and last substitute replacement pattern.
 *
 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
 */
int
ex_subtilde(sp, cmdp)
	SCR *sp;
	EXCMD *cmdp;
{
	if (sp->re == NULL) {
		ex_emsg(sp, NULL, EXM_NOPREVRE);
		return (1);
	}
	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
	    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
		return (1);
	return (s(sp,
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
}

/*
 * s --
 * Do the substitution.  This stuff is *really* tricky.  There are lots of
 * special cases, and general nastiness.  Don't mess with it unless you're
 * pretty confident.
 * 
 * The nasty part of the substitution is what happens when the replacement
 * string contains newlines.  It's a bit tricky -- consider the information
 * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 * to build a set of newline offsets which we use to break the line up later,
 * when the replacement is done.  Don't change it unless you're *damned*
 * confident.
 */
#define	NEEDNEWLINE(sp) {						\
	if (sp->newl_len == sp->newl_cnt) {				\
		sp->newl_len += 25;					\
		REALLOC(sp, sp->newl, size_t *,				\
		    sp->newl_len * sizeof(size_t));			\
		if (sp->newl == NULL) {					\
			sp->newl_len = 0;				\
			return (1);					\
		}							\
	}								\
}

#define	BUILD(sp, l, len) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAX(lbclen + (len), 256);			\
		REALLOC(sp, lb, char *, lblen);				\
		if (lb == NULL) {					\
			lbclen = 0;					\
			return (1);					\
		}							\
	}								\
	memcpy(lb + lbclen, l, len);					\
	lbclen += len;							\
}

#define	NEEDSP(sp, len, pnt) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAX(lbclen + (len), 256);			\
		REALLOC(sp, lb, char *, lblen);				\
		if (lb == NULL) {					\
			lbclen = 0;					\
			return (1);					\
		}							\
		pnt = lb + lbclen;					\
	}								\
}

static int
s(sp, cmdp, s, re, flags)
	SCR *sp;
	EXCMD *cmdp;
	char *s;
	regex_t *re;
	u_int flags;
{
	EVENT ev;
	MARK from, to;
	TEXTH tiq;
	recno_t elno, lno, slno;
	long llno;
	regmatch_t match[10];
	size_t blen, cnt, last, lbclen, lblen, len, llen;
	size_t offset, saved_offset, scno;
	int cflag, lflag, nflag, pflag, rflag;
	int didsub, do_eol_match, eflags, empty_ok, eval;
	int linechanged, matched, quit, rval;
	char *bp, *lb;
#ifndef REG_STARTEND
	char c;
#endif

	NEEDFILE(sp, cmdp);

	slno = sp->lno;
	scno = sp->cno;

	/*
	 * !!!
	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
	 * not set, they were initialized to 0 for all substitute commands.  If
	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
	 * specified substitute/replacement patterns (see ex_s()).
	 */
	if (!O_ISSET(sp, O_EDCOMPATIBLE))
		sp->c_suffix = sp->g_suffix = 0;

	/*
	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
	 * it only displayed the last change.  I'd disallow them, but they are
	 * useful in combination with the [v]global commands.  In the current
	 * model the problem is combining them with the 'c' flag -- the screen
	 * would have to flip back and forth between the confirm screen and the
	 * ex print screen, which would be pretty awful.  We do display all
	 * changes, though, for what that's worth.
	 *
	 * !!!
	 * Historic vi was fairly strict about the order of "options", the
	 * count, and "flags".  I'm somewhat fuzzy on the difference between
	 * options and flags, anyway, so this is a simpler approach, and we
	 * just take it them in whatever order the user gives them.  (The ex
	 * usage statement doesn't reflect this.)
	 */
	cflag = lflag = nflag = pflag = rflag = 0;
	if (s == NULL)
		goto noargs;
	for (lno = OOBLNO; *s != '\0'; ++s)
		switch (*s) {
		case ' ':
		case '\t':
			continue;
		case '+':
			++cmdp->flagoff;
			break;
		case '-':
			--cmdp->flagoff;
			break;
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			if (lno != OOBLNO)
				goto usage;
			errno = 0;
			llno = strtoul(s, &s, 10);
			if (*s == '\0')		/* Loop increment correction. */
				--s;
			lno = llno;
			if (llno != lno) {
				errno = ERANGE;
				llno = LONG_MAX;
			}
			if (errno == ERANGE) {
				if (llno == LONG_MAX)
					msgq(sp, M_ERR, "153|Count overflow");
				else if (llno == LONG_MIN)
					msgq(sp, M_ERR, "154|Count underflow");
				else
					msgq(sp, M_SYSERR, NULL);
				return (1);
			}
			/*
			 * In historic vi, the count was inclusive from the
			 * second address.
			 */
			cmdp->addr1.lno = cmdp->addr2.lno;
			cmdp->addr2.lno += lno - 1;
			if (!db_exist(sp, cmdp->addr2.lno) &&
			    db_last(sp, &cmdp->addr2.lno))
				return (1);
			break;
		case '#':
			nflag = 1;
			break;
		case 'c':
			sp->c_suffix = !sp->c_suffix;

			/* Ex text structure initialization. */
			if (F_ISSET(sp, SC_EX)) {
				memset(&tiq, 0, sizeof(TEXTH));
				CIRCLEQ_INIT(&tiq);
			}
			break;
		case 'g':
			sp->g_suffix = !sp->g_suffix;
			break;
		case 'l':
			lflag = 1;
			break;
		case 'p':
			pflag = 1;
			break;
		case 'r':
			if (LF_ISSET(SUB_FIRST)) {
				msgq(sp, M_ERR,
		    "155|Regular expression specified; r flag meaningless");
				return (1);
			}
			if (!F_ISSET(sp, SC_RE_SEARCH)) {
				ex_emsg(sp, NULL, EXM_NOPREVRE);
				return (1);
			}
			rflag = 1;
			re = &sp->re_c;
			break;
		default:
			goto usage;
		}

	if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
		return (1);
	}

noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
		msgq(sp, M_ERR,
"156|The #, l and p flags may not be combined with the c flag in vi mode");
		return (1);
	}

	/*
	 * bp:		if interactive, line cache
	 * blen:	if interactive, line cache length
	 * lb:		build buffer pointer.
	 * lbclen:	current length of built buffer.
	 * lblen;	length of build buffer.
	 */
	bp = lb = NULL;
	blen = lbclen = lblen = 0;

	/* For each line... */
	for (matched = quit = 0, lno = cmdp->addr1.lno,
	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {

		/* Someone's unhappy, time to stop. */
		if (INTERRUPTED(sp))
			break;

		/* Get the line. */
		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
			goto err;

		/*
		 * Make a local copy if doing confirmation -- when calling
		 * the confirm routine we're likely to lose the cached copy.
		 */
		if (sp->c_suffix) {
			if (bp == NULL) {
				GET_SPACE_RET(sp, bp, blen, llen);
			} else
				ADD_SPACE_RET(sp, bp, blen, llen);
			if (bp)
				memcpy(bp, s, llen);
			s = bp;
		}

		/* Start searching from the beginning. */
		offset = 0;
		len = llen;

		/* Reset the build buffer offset. */
		lbclen = 0;

		/* Reset empty match flag. */
		empty_ok = 1;

		/*
		 * We don't want to have to do a setline if the line didn't
		 * change -- keep track of whether or not this line changed.
		 * If doing confirmations, don't want to keep setting the
		 * line if change is refused -- keep track of substitutions.
		 */
		didsub = linechanged = 0;

		/* New line, do an EOL match. */
		do_eol_match = 1;

		/* It's not nul terminated, but we pretend it is. */
#ifdef REG_STARTEND
		eflags = REG_STARTEND;
#else
		eflags = 0;
#endif

		/*
		 * The search area is from s + offset to the EOL.
		 *
		 * Generally, match[0].rm_so is the offset of the start
		 * of the match from the start of the search, and offset
		 * is the offset of the start of the last search.
		 */
nextmatch:
#ifdef REG_STARTEND
		match[0].rm_so = 0;
		match[0].rm_eo = len;
#else
		c = s[len];
		s[len] = '\0';
#endif

		/* Get the next match. */
		eval = regexec(re, (char *)s + offset, 10, match, eflags);
#ifndef REG_STARTEND
		s[len] = c;
#endif

		/*
		 * There wasn't a match or if there was an error, deal with
		 * it.  If there was a previous match in this line, resolve
		 * the changes into the database.  Otherwise, just move on.
		 */
		if (eval == REG_NOMATCH)
			goto endmatch;
		if (eval != 0) {
			re_error(sp, eval, re);
			goto err;
		}
		matched = 1;

		/* Only the first search can match an anchored expression. */
		eflags |= REG_NOTBOL;

		/*
		 * !!!
		 * It's possible to match 0-length strings -- for example, the
		 * command s;a*;X;, when matched against the string "aabb" will
		 * result in "XbXbX", i.e. the matches are "aa", the space
		 * between the b's and the space between the b's and the end of
		 * the string.  There is a similar space between the beginning
		 * of the string and the a's.  The rule that we use (because vi
		 * historically used it) is that any 0-length match, occurring
		 * immediately after a match, is ignored.  Otherwise, the above
		 * example would have resulted in "XXbXbX".  Another example is
		 * incorrectly using " *" to replace groups of spaces with one
		 * space.
		 *
		 * The way we do this is that if we just had a successful match,
		 * the starting offset does not skip characters, and the match
		 * is empty, ignore the match and move forward.  If there's no
		 * more characters in the string, we were attempting to match
		 * after the last character, so quit.
		 */
		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
			empty_ok = 1;
			if (len == 0)
				goto endmatch;
			BUILD(sp, s + offset, 1)
			++offset;
			--len;
			goto nextmatch;
		}

		/* Confirm change. */
		if (sp->c_suffix) {
			/*
			 * Set the cursor position for confirmation.  Note,
			 * if we matched on a '$', the cursor may be past
			 * the end of line.
			 */
			from.lno = to.lno = lno;
			from.cno = match[0].rm_so + offset;
			to.cno = match[0].rm_eo + offset;
			/*
			 * Both ex and vi have to correct for a change before
			 * the first character in the line.
			 */
			if (llen == 0)
				from.cno = to.cno = 0;
			if (F_ISSET(sp, SC_VI)) {
				/*
				 * Only vi has to correct for a change after
				 * the last character in the line.
				 *
				 * XXX
				 * It would be nice to change the vi code so
				 * that we could display a cursor past EOL.
				 */
				if (to.cno >= llen)
					to.cno = llen - 1;
				if (from.cno >= llen)
					from.cno = llen - 1;

				sp->lno = from.lno;
				sp->cno = from.cno;
				if (vs_refresh(sp, 1))
					goto err;

				vs_update(sp, msg_cat(sp,
				    "169|Confirm change? [n]", NULL), NULL);

				if (v_event_get(sp, &ev, 0, 0))
					goto err;
				switch (ev.e_event) {
				case E_CHARACTER:
					break;
				case E_EOF:
				case E_ERR:
				case E_INTERRUPT:
					goto lquit;
				default:
					v_event_err(sp, &ev);
					goto lquit;
				}
			} else {
				if (ex_print(sp, cmdp, &from, &to, 0) ||
				    ex_scprint(sp, &from, &to))
					goto lquit;
				if (ex_txt(sp, &tiq, 0, TXT_CR))
					goto err;
				ev.e_c = tiq.cqh_first->lb[0];
			}

			switch (ev.e_c) {
			case CH_YES:
				break;
			default:
			case CH_NO:
				didsub = 0;
				BUILD(sp, s +offset, match[0].rm_eo);
				goto skip;
			case CH_QUIT:
				/* Set the quit/interrupted flags. */
lquit:				quit = 1;
				F_SET(sp->gp, G_INTERRUPTED);

				/*
				 * Resolve any changes, then return to (and
				 * exit from) the main loop.
				 */
				goto endmatch;
			}
		}

		/*
		 * Set the cursor to the last position changed, converting
		 * from 1-based to 0-based.
		 */
		sp->lno = lno;
		sp->cno = match[0].rm_so;

		/* Copy the bytes before the match into the build buffer. */
		BUILD(sp, s + offset, match[0].rm_so);

		/* Substitute the matching bytes. */
		didsub = 1;
		if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
			goto err;

		/* Set the change flag so we know this line was modified. */
		linechanged = 1;

		/* Move past the matched bytes. */
skip:		offset += match[0].rm_eo;
		len -= match[0].rm_eo;

		/* A match cannot be followed by an empty pattern. */
		empty_ok = 0;

		/*
		 * If doing a global change with confirmation, we have to
		 * update the screen.  The basic idea is to store the line
		 * so the screen update routines can find it, and restart.
		 */
		if (didsub && sp->c_suffix && sp->g_suffix) {
			/*
			 * The new search offset will be the end of the
			 * modified line.
			 */
			saved_offset = lbclen;

			/* Copy the rest of the line. */
			if (len)
				BUILD(sp, s + offset, len)

			/* Set the new offset. */
			offset = saved_offset;

			/* Store inserted lines, adjusting the build buffer. */
			last = 0;
			if (sp->newl_cnt) {
				for (cnt = 0;
				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
					if (db_insert(sp, lno,
					    lb + last, sp->newl[cnt] - last))
						goto err;
					last = sp->newl[cnt] + 1;
					++sp->rptlines[L_ADDED];
				}
				lbclen -= last;
				offset -= last;
				sp->newl_cnt = 0;
			}

			/* Store and retrieve the line. */
			if (db_set(sp, lno, lb + last, lbclen))
				goto err;
			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
				goto err;
			ADD_SPACE_RET(sp, bp, blen, llen)
			memcpy(bp, s, llen);
			s = bp;
			len = llen - offset;

			/* Restart the build. */
			lbclen = 0;
			BUILD(sp, s, offset);

			/*
			 * If we haven't already done the after-the-string
			 * match, do one.  Set REG_NOTEOL so the '$' pattern
			 * only matches once.
			 */
			if (!do_eol_match)
				goto endmatch;
			if (offset == len) {
				do_eol_match = 0;
				eflags |= REG_NOTEOL;
			}
			goto nextmatch;
		}

		/*
		 * If it's a global:
		 *
		 * If at the end of the string, do a test for the after
		 * the string match.  Set REG_NOTEOL so the '$' pattern
		 * only matches once.
		 */
		if (sp->g_suffix && do_eol_match) {
			if (len == 0) {
				do_eol_match = 0;
				eflags |= REG_NOTEOL;
			}
			goto nextmatch;
		}

endmatch:	if (!linechanged)
			continue;

		/* Copy any remaining bytes into the build buffer. */
		if (len)
			BUILD(sp, s + offset, len)

		/* Store inserted lines, adjusting the build buffer. */
		last = 0;
		if (sp->newl_cnt) {
			for (cnt = 0;
			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
				if (db_insert(sp,
				    lno, lb + last, sp->newl[cnt] - last))
					goto err;
				last = sp->newl[cnt] + 1;
				++sp->rptlines[L_ADDED];
			}
			lbclen -= last;
			sp->newl_cnt = 0;
		}

		/* Store the changed line. */
		if (db_set(sp, lno, lb + last, lbclen))
			goto err;

		/* Update changed line counter. */
		if (sp->rptlchange != lno) {
			sp->rptlchange = lno;
			++sp->rptlines[L_CHANGED];
		}

		/*
		 * !!!
		 * Display as necessary.  Historic practice is to only
		 * display the last line of a line split into multiple
		 * lines.
		 */
		if (lflag || nflag || pflag) {
			from.lno = to.lno = lno;
			from.cno = to.cno = 0;
			if (lflag)
				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
			if (nflag)
				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
			if (pflag)
				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
		}
	}

	/*
	 * !!!
	 * Historically, vi attempted to leave the cursor at the same place if
	 * the substitution was done at the current cursor position.  Otherwise
	 * it moved it to the first non-blank of the last line changed.  There
	 * were some problems: for example, :s/$/foo/ with the cursor on the
	 * last character of the line left the cursor on the last character, or
	 * the & command with multiple occurrences of the matching string in the
	 * line usually left the cursor in a fairly random position.
	 *
	 * We try to do the same thing, with the exception that if the user is
	 * doing substitution with confirmation, we move to the last line about
	 * which the user was consulted, as opposed to the last line that they
	 * actually changed.  This prevents a screen flash if the user doesn't
	 * change many of the possible lines.
	 */
	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
		sp->cno = 0;
		(void)nonblank(sp, sp->lno, &sp->cno);
	}

	/*
	 * If not in a global command, and nothing matched, say so.
	 * Else, if none of the lines displayed, put something up.
	 */
	rval = 0;
	if (!matched) {
		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
			msgq(sp, M_ERR, "157|No match found");
			goto err;
		}
	} else if (!lflag && !nflag && !pflag)
		F_SET(cmdp, E_AUTOPRINT);

	if (0) {
err:		rval = 1;
	}

	if (bp != NULL)
		FREE_SPACE(sp, bp, blen);
	if (lb != NULL)
		free(lb);
	return (rval);
}

/*
 * re_compile --
 *	Compile the RE.
 *
 * PUBLIC: int re_compile __P((SCR *,
 * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int));
 */
int
re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
	SCR *sp;
	char *ptrn, **ptrnp;
	size_t plen, *lenp;
	regex_t *rep;
	u_int flags;
{
	size_t len;
	int reflags, replaced, rval;
	char *p;

	/* Set RE flags. */
	reflags = 0;
	if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
		if (O_ISSET(sp, O_EXTENDED))
			reflags |= REG_EXTENDED;
		if (O_ISSET(sp, O_IGNORECASE))
			reflags |= REG_ICASE;
		if (O_ISSET(sp, O_ICLOWER)) {
			for (p = ptrn, len = plen; len > 0; ++p, --len)
				if (isupper((unsigned char)*p))
					break;
			if (len == 0)
				reflags |= REG_ICASE;
		}
	}

	/* If we're replacing a saved value, clear the old one. */
	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
		regfree(&sp->re_c);
		F_CLR(sp, SC_RE_SEARCH);
	}
	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
		regfree(&sp->subre_c);
		F_CLR(sp, SC_RE_SUBST);
	}

	/*
	 * If we're saving the string, it's a pattern we haven't seen before,
	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
	 * later recompilation.   Free any previously saved value.
	 */
	if (ptrnp != NULL) {
		if (LF_ISSET(RE_C_CSCOPE)) {
			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
				return (1);
			/*
			 * XXX
			 * Currently, the match-any-<blank> expression used in
			 * re_cscope_conv() requires extended RE's.  This may
			 * not be right or safe.
			 */
			reflags |= REG_EXTENDED;
		} else if (LF_ISSET(RE_C_TAG)) {
			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
				return (1);
		} else
			if (re_conv(sp, &ptrn, &plen, &replaced))
				return (1);

		/* Discard previous pattern. */
		if (*ptrnp != NULL) {
			free(*ptrnp);
			*ptrnp = NULL;
		}
		if (lenp != NULL)
			*lenp = plen;

		/*
		 * Copy the string into allocated memory.
		 *
		 * XXX
		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
		 * for now.  There's just no other solution.  
		 */
		MALLOC(sp, *ptrnp, char *, plen + 1);
		if (*ptrnp != NULL) {
			memcpy(*ptrnp, ptrn, plen);
			(*ptrnp)[plen] = '\0';
		}

		/* Free up conversion-routine-allocated memory. */
		if (replaced)
			FREE_SPACE(sp, ptrn, 0);

		if (*ptrnp == NULL)
			return (1);

		ptrn = *ptrnp;
	}

	/*
	 * XXX
	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
	 * contained a nul.  Bummer!
	 */
	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
		if (!LF_ISSET(RE_C_SILENT))
			re_error(sp, rval, rep); 
		return (1);
	}

	if (LF_ISSET(RE_C_SEARCH))
		F_SET(sp, SC_RE_SEARCH);
	if (LF_ISSET(RE_C_SUBST))
		F_SET(sp, SC_RE_SUBST);

	return (0);
}

/*
 * re_conv --
 *	Convert vi's regular expressions into something that the
 *	the POSIX 1003.2 RE functions can handle.
 *
 * There are three conversions we make to make vi's RE's (specifically
 * the global, search, and substitute patterns) work with POSIX RE's.
 *
 * 1: If O_MAGIC is not set, strip backslashes from the magic character
 *    set (.[*~) that have them, and add them to the ones that don't.
 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
 *    from the last substitute command's replacement string.  If O_MAGIC
 *    is set, it's the string "~".
 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
 *    new RE escapes.
 *
 * !!!/XXX
 * This doesn't exactly match the historic behavior of vi because we do
 * the ~ substitution before calling the RE engine, so magic characters
 * in the replacement string will be expanded by the RE engine, and they
 * weren't historically.  It's a bug.
 */
static int
re_conv(sp, ptrnp, plenp, replacedp)
	SCR *sp;
	char **ptrnp;
	size_t *plenp;
	int *replacedp;
{
	size_t blen, len, needlen;
	int magic;
	char *bp, *p, *t;

	/*
	 * First pass through, we figure out how much space we'll need.
	 * We do it in two passes, on the grounds that most of the time
	 * the user is doing a search and won't have magic characters.
	 * That way we can skip most of the memory allocation and copies.
	 */
	magic = 0;
	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
		switch (*p) {
		case '\\':
			if (len > 1) {
				--len;
				switch (*++p) {
				case '<':
					magic = 1;
					needlen += sizeof(RE_WSTART);
					break;
				case '>':
					magic = 1;
					needlen += sizeof(RE_WSTOP);
					break;
				case '~':
					if (!O_ISSET(sp, O_MAGIC)) {
						magic = 1;
						needlen += sp->repl_len;
					}
					break;
				case '.':
				case '[':
				case '*':
					if (!O_ISSET(sp, O_MAGIC)) {
						magic = 1;
						needlen += 1;
					}
					break;
				default:
					needlen += 2;
				}
			} else
				needlen += 1;
			break;
		case '~':
			if (O_ISSET(sp, O_MAGIC)) {
				magic = 1;
				needlen += sp->repl_len;
			}
			break;
		case '.':
		case '[':
		case '*':
			if (!O_ISSET(sp, O_MAGIC)) {
				magic = 1;
				needlen += 2;
			}
			break;
		default:
			needlen += 1;
			break;
		}

	if (!magic) {
		*replacedp = 0;
		return (0);
	}

	/* Get enough memory to hold the final pattern. */
	*replacedp = 1;
	GET_SPACE_RET(sp, bp, blen, needlen);

	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
		switch (*p) {
		case '\\':
			if (len > 1) {
				--len;
				switch (*++p) {
				case '<':
					memcpy(t,
					    RE_WSTART, sizeof(RE_WSTART) - 1);
					t += sizeof(RE_WSTART) - 1;
					break;
				case '>':
					memcpy(t,
					    RE_WSTOP, sizeof(RE_WSTOP) - 1);
					t += sizeof(RE_WSTOP) - 1;
					break;
				case '~':
					if (O_ISSET(sp, O_MAGIC))
						*t++ = '~';
					else {
						memcpy(t,
						    sp->repl, sp->repl_len);
						t += sp->repl_len;
					}
					break;
				case '.':
				case '[':
				case '*':
					if (O_ISSET(sp, O_MAGIC))
						*t++ = '\\';
					*t++ = *p;
					break;
				default:
					*t++ = '\\';
					*t++ = *p;
				}
			} else
				*t++ = '\\';
			break;
		case '~':
			if (O_ISSET(sp, O_MAGIC)) {
				memcpy(t, sp->repl, sp->repl_len);
				t += sp->repl_len;
			} else
				*t++ = '~';
			break;
		case '.':
		case '[':
		case '*':
			if (!O_ISSET(sp, O_MAGIC))
				*t++ = '\\';
			*t++ = *p;
			break;
		default:
			*t++ = *p;
			break;
		}

	*ptrnp = bp;
	*plenp = t - bp;
	return (0);
}

/*
 * re_tag_conv --
 *	Convert a tags search path into something that the POSIX
 *	1003.2 RE functions can handle.
 */
static int
re_tag_conv(sp, ptrnp, plenp, replacedp)
	SCR *sp;
	char **ptrnp;
	size_t *plenp;
	int *replacedp;
{
	size_t blen, len;
	int lastdollar;
	char *bp, *p, *t;

	len = *plenp;

	/* Max memory usage is 2 times the length of the string. */
	*replacedp = 1;
	GET_SPACE_RET(sp, bp, blen, len * 2);

	p = *ptrnp;
	t = bp;

	/* If the last character is a '/' or '?', we just strip it. */
	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
		--len;

	/* If the next-to-last or last character is a '$', it's magic. */
	if (len > 0 && p[len - 1] == '$') {
		--len;
		lastdollar = 1;
	} else
		lastdollar = 0;

	/* If the first character is a '/' or '?', we just strip it. */
	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
		++p;
		--len;
	}

	/* If the first or second character is a '^', it's magic. */
	if (p[0] == '^') {
		*t++ = *p++;
		--len;
	}

	/*
	 * Escape every other magic character we can find, meanwhile stripping
	 * the backslashes ctags inserts when escaping the search delimiter
	 * characters.
	 */
	for (; len > 0; --len) {
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
			++p;
			--len;
		} else if (strchr("^.[]$*", p[0]))
			*t++ = '\\';
		*t++ = *p++;
	}
	if (lastdollar)
		*t++ = '$';

	*ptrnp = bp;
	*plenp = t - bp;
	return (0);
}

/*
 * re_cscope_conv --
 *	 Convert a cscope search path into something that the POSIX
 *      1003.2 RE functions can handle.
 */
static int
re_cscope_conv(sp, ptrnp, plenp, replacedp)
	SCR *sp;
	char **ptrnp;
	size_t *plenp;
	int *replacedp;
{
	size_t blen, len, nspaces;
	char *bp, *p, *t;

	/*
	 * Each space in the source line printed by cscope represents an
	 * arbitrary sequence of spaces, tabs, and comments.
	 */
#define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
		if (*p == ' ')
			++nspaces;

	/*
	 * Allocate plenty of space:
	 *	the string, plus potential escaping characters;
	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
	 *	^, $, nul terminator characters.
	 */
	*replacedp = 1;
	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
	GET_SPACE_RET(sp, bp, blen, len);

	p = *ptrnp;
	t = bp;

	*t++ = '^';
	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	t += sizeof(CSCOPE_RE_SPACE) - 1;

	for (len = *plenp; len > 0; ++p, --len)
		if (*p == ' ') {
			memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
			t += sizeof(CSCOPE_RE_SPACE) - 1;
		} else {
			if (strchr("\\^.[]$*+?()|{}", *p))
				*t++ = '\\';
			*t++ = *p;
		}

	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	t += sizeof(CSCOPE_RE_SPACE) - 1;
	*t++ = '$';

	*ptrnp = bp;
	*plenp = t - bp;
	return (0);
}

/*
 * re_error --
 *	Report a regular expression error.
 *
 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
 */
void
re_error(sp, errcode, preg)
	SCR *sp;
	int errcode;
	regex_t *preg;
{
	size_t s;
	char *oe;

	s = regerror(errcode, preg, "", 0);
	if ((oe = malloc(s)) == NULL)
		msgq(sp, M_SYSERR, NULL);
	else {
		(void)regerror(errcode, preg, oe, s);
		msgq(sp, M_ERR, "RE error: %s", oe);
		free(oe);
	}
}

/*
 * re_sub --
 * 	Do the substitution for a regular expression.
 */
static int
re_sub(sp, ip, lbp, lbclenp, lblenp, match)
	SCR *sp;
	char *ip;			/* Input line. */
	char **lbp;
	size_t *lbclenp, *lblenp;
	regmatch_t match[10];
{
	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
	size_t lbclen, lblen;		/* Local copies. */
	size_t mlen;			/* Match length. */
	size_t rpl;			/* Remaining replacement length. */
	char *rp;			/* Replacement pointer. */
	int ch;
	int no;				/* Match replacement offset. */
	char *p, *t;			/* Buffer pointers. */
	char *lb;			/* Local copies. */

	lb = *lbp;			/* Get local copies. */
	lbclen = *lbclenp;
	lblen = *lblenp;

	/*
	 * QUOTING NOTE:
	 *
	 * There are some special sequences that vi provides in the
	 * replacement patterns.
	 *	 & string the RE matched (\& if nomagic set)
	 *	\# n-th regular subexpression
	 *	\E end \U, \L conversion
	 *	\e end \U, \L conversion
	 *	\l convert the next character to lower-case
	 *	\L convert to lower-case, until \E, \e, or end of replacement
	 *	\u convert the next character to upper-case
	 *	\U convert to upper-case, until \E, \e, or end of replacement
	 *
	 * Otherwise, since this is the lowest level of replacement, discard
	 * all escaping characters.  This (hopefully) matches historic practice.
	 */
#define	OUTCH(ch, nltrans) {						\
	CHAR_T __ch = (ch);						\
	u_int __value = KEY_VAL(sp, __ch);				\
	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
		NEEDNEWLINE(sp);					\
		sp->newl[sp->newl_cnt++] = lbclen;			\
	} else if (conv != C_NOTSET) {					\
		switch (conv) {						\
		case C_ONELOWER:					\
			conv = C_NOTSET;				\
			/* FALLTHROUGH */				\
		case C_LOWER:						\
			if (isupper(__ch))				\
				__ch = tolower(__ch);			\
			break;						\
		case C_ONEUPPER:					\
			conv = C_NOTSET;				\
			/* FALLTHROUGH */				\
		case C_UPPER:						\
			if (islower(__ch))				\
				__ch = toupper(__ch);			\
			break;						\
		default:						\
			abort();					\
		}							\
	}								\
	NEEDSP(sp, 1, p);						\
	*p++ = __ch;							\
	++lbclen;							\
}
	conv = C_NOTSET;
	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
		switch (ch = *rp++) {
		case '&':
			if (O_ISSET(sp, O_MAGIC)) {
				no = 0;
				goto subzero;
			}
			break;
		case '\\':
			if (rpl == 0)
				break;
			--rpl;
			switch (ch = *rp) {
			case '&':
				++rp;
				if (!O_ISSET(sp, O_MAGIC)) {
					no = 0;
					goto subzero;
				}
				break;
			case '0': case '1': case '2': case '3': case '4':
			case '5': case '6': case '7': case '8': case '9':
				no = *rp++ - '0';
subzero:			if (match[no].rm_so == -1 ||
			    	    match[no].rm_eo == -1)
					break;
				mlen = match[no].rm_eo - match[no].rm_so;
				for (t = ip + match[no].rm_so; mlen--; ++t)
					OUTCH(*t, 0);
				continue;
			case 'e':
			case 'E':
				++rp;
				conv = C_NOTSET;
				continue;
			case 'l':
				++rp;
				conv = C_ONELOWER;
				continue;
			case 'L':
				++rp;
				conv = C_LOWER;
				continue;
			case 'u':
				++rp;
				conv = C_ONEUPPER;
				continue;
			case 'U':
				++rp;
				conv = C_UPPER;
				continue;
			default:
				++rp;
				break;
			}
		}
		OUTCH(ch, 1);
	}

	*lbp = lb;			/* Update caller's information. */
	*lbclenp = lbclen;
	*lblenp = lblen;
	return (0);
}
@


1.18
log
@Coverity CID 4171: bp can be null if len == 0. So check for it anyway and
don't call memcpy unconditionally.
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.17 2005/02/12 12:53:23 aymeric Exp $	*/
d19 1
a19 1
__RCSID("$NetBSD: ex_subst.c,v 1.17 2005/02/12 12:53:23 aymeric Exp $");
@


1.17
log
@Fix the RCSID's to be $NetBSD$ instead of $NetBSD
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.16 2004/11/05 19:50:12 dsl Exp $	*/
d19 1
a19 1
__RCSID("$NetBSD$");
d535 2
a536 1
			memcpy(bp, s, llen);
@


1.16
log
@Add (unsigned char) cast to ctype functions
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.15 2004/02/13 17:56:18 wiz Exp $	*/
d19 1
a19 1
__RCSID("$NetBSD");
@


1.15
log
@Spell the plural of suffix "suffixes", not "suffices".
Inspired by PR 24400 by Todd Vierling.
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.14 2002/04/09 01:47:34 thorpej Exp $	*/
d942 1
a942 1
				if (isupper(*p))
@


1.15.4.1
log
@Pull up revision 1.17 (requested by aymeric in ticket #1195):
Fix the RCSID's to be $NetBSD$ instead of $NetBSD
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.15 2004/02/13 17:56:18 wiz Exp $	*/
d19 1
a19 1
__RCSID("$NetBSD$");
@


1.15.2.1
log
@Fix the RCSID's to be $NetBSD$ instead of $NetBSD
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.15 2004/02/13 17:56:18 wiz Exp $	*/
d19 1
a19 1
__RCSID("$NetBSD$");
@


1.15.2.2
log
@Backout previous. Sorry.
@
text
@d19 1
a19 1
__RCSID("$NetBSD");
@


1.14
log
@Use __RCSID() and __COPYRIGHT().
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.13 2001/03/31 11:37:50 aymeric Exp $	*/
d96 1
a96 1
	 * state of the 'c' and 'g' suffices.
@


1.13
log
@merge changes after import of nvi 1.79
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.12 2000/03/17 02:23:17 christos Exp $	*/
d14 1
d16 1
d18 3
@


1.12
log
@Don't depend on BSD regexec extensions (REG_STARTEND)
Emulate it if it is not defined.
@
text
@d1 1
a1 1
/*	$NetBSD: ex_subst.c,v 1.11 1998/01/09 08:08:05 perry Exp $	*/
d15 1
a15 1
static const char sccsid[] = "@@(#)ex_subst.c	10.30 (Berkeley) 5/16/96";
d37 2
a38 2
static int re_conv __P((SCR *, char **, int *));
static int re_cscope_conv __P((SCR *, char **, int *));
d41 1
a41 1
static int re_tag_conv __P((SCR *, char **, int *));
d96 1
a96 1
	 * Get the pattern string, toss escaped characters.
d108 1
a108 1
	 * Only toss an escape character if it escapes a delimiter.
d111 1
a111 1
	 * escaping a single escape character is removed, but that's
d144 3
a146 3
		/* Compile the RE if necessary. */
		if (!F_ISSET(sp, SC_RE_SEARCH) &&
		    re_compile(sp, sp->re, NULL, NULL, &sp->re_c, RE_C_SEARCH))
d157 2
a158 2
		if (re_compile(sp,
		    ptrn, &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
d160 2
a161 2
		if (re_compile(sp,
		    ptrn, &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
d184 1
a184 1
	 * Only toss an escape character if it escapes a delimiter or
d227 1
a227 1
				memmove(t, sp->repl, sp->repl_len);
d243 1
a243 1
			memmove(sp->repl, bp, len);
d267 2
a268 2
	if (!F_ISSET(sp, SC_RE_SUBST) &&
	    re_compile(sp, sp->subre, NULL, NULL, &sp->subre_c, RE_C_SUBST))
d291 2
a292 2
	if (!F_ISSET(sp, SC_RE_SEARCH) &&
	    re_compile(sp, sp->re, NULL, NULL, &sp->re_c, RE_C_SEARCH))
d332 1
a332 1
	memmove(lb + lbclen, l, len);					\
d530 1
a530 1
			memmove(bp, s, llen);
d569 1
a569 1
nextmatch:	
d574 2
a575 2
		c = ((char *)s)[len];
		((char *)s)[len] = '\0';
d577 1
d581 1
a581 1
		((char *)s)[len] = c;
a583 1

d777 1
a777 1
			memmove(bp, s, llen);
d914 1
a914 1
 * PUBLIC:     char *, char **, size_t *, regex_t *, u_int));
d917 1
a917 1
re_compile(sp, ptrn, ptrnp, lenp, rep, flags)
d920 1
a920 1
	size_t *lenp;
d936 1
a936 1
			for (p = ptrn; *p != '\0'; ++p)
d939 1
a939 1
			if (*p == '\0')
a958 1
	replaced = 0;
a959 4
		if (*ptrnp != NULL) {
			free(*ptrnp);
			*ptrnp = NULL;
		}
d961 1
a961 1
			if (re_cscope_conv(sp, &ptrn, &replaced))
d971 1
a971 1
			if (re_tag_conv(sp, &ptrn, &replaced))
d974 1
a974 1
			if (re_conv(sp, &ptrn, &replaced))
d976 6
a981 1
		len = strlen(ptrn);
d983 20
a1002 2
			*lenp = len;
		if ((*ptrnp = v_strdup(sp, ptrn, len)) == NULL)
d1004 2
d1008 6
a1013 7
	rval = regcomp(rep, ptrn, reflags);

	/* Free up any allocated memory. */
	if (replaced)
		FREE_SPACE(sp, ptrn, 0);

	if (rval) {
d1050 1
a1050 1
re_conv(sp, ptrnp, replacedp)
d1053 1
d1056 1
a1056 1
	size_t blen, needlen;
d1064 1
a1064 1
	 * That way we can skip the malloc and memmove's.
d1066 2
a1067 1
	for (p = *ptrnp, magic = 0, needlen = 0; *p != '\0'; ++p)
d1070 4
a1073 11
			switch (*++p) {
			case '<':
				magic = 1;
				needlen += sizeof(RE_WSTART);
				break;
			case '>':
				magic = 1;
				needlen += sizeof(RE_WSTOP);
				break;
			case '~':
				if (!O_ISSET(sp, O_MAGIC)) {
d1075 3
a1077 7
					needlen += sp->repl_len;
				}
				break;
			case '.':
			case '[':
			case '*':
				if (!O_ISSET(sp, O_MAGIC)) {
d1079 18
a1096 1
					needlen += 1;
d1098 2
a1099 4
				break;
			default:
				needlen += 2;
			}
d1125 3
a1127 7
	/*
	 * Get enough memory to hold the final pattern.
	 *
	 * XXX
	 * It's nul-terminated, for now.
	 */
	GET_SPACE_RET(sp, bp, blen, needlen + 1);
d1129 1
a1129 1
	for (p = *ptrnp, t = bp; *p != '\0'; ++p)
d1132 32
a1163 15
			switch (*++p) {
			case '<':
				memmove(t, RE_WSTART, sizeof(RE_WSTART) - 1);
				t += sizeof(RE_WSTART) - 1;
				break;
			case '>':
				memmove(t, RE_WSTOP, sizeof(RE_WSTOP) - 1);
				t += sizeof(RE_WSTOP) - 1;
				break;
			case '~':
				if (O_ISSET(sp, O_MAGIC))
					*t++ = '~';
				else {
					memmove(t, sp->repl, sp->repl_len);
					t += sp->repl_len;
d1165 1
a1165 9
				break;
			case '.':
			case '[':
			case '*':
				if (O_ISSET(sp, O_MAGIC))
					*t++ = '\\';
				*t++ = *p;
				break;
			default:
a1166 2
				*t++ = *p;
			}
d1170 1
a1170 1
				memmove(t, sp->repl, sp->repl_len);
a1185 1
	*t = '\0';
d1188 1
a1188 1
	*replacedp = 1;
d1198 1
a1198 1
re_tag_conv(sp, ptrnp, replacedp)
d1201 1
d1208 1
a1208 3
	*replacedp = 0;

	len = strlen(p = *ptrnp);
d1211 1
d1214 1
d1217 7
a1223 6
	/* The last character is a '/' or '?', we just strip it. */
	if (p[len - 1] == '/' || p[len - 1] == '?')
		p[len - 1] = '\0';

	/* The next-to-last character is a '$', and it's magic. */
	if (p[len - 2] == '$') {
a1224 1
		p[len - 2] = '\0';
d1228 2
a1229 2
	/* The first character is a '/' or '?', we just strip it. */
	if (p[0] == '/' || p[0] == '?')
d1231 2
d1234 2
a1235 2
	/* The second character is a '^', and it's magic. */
	if (p[0] == '^')
d1237 2
d1241 2
a1242 2
	 * Escape every other magic character we can find, stripping the
	 * backslashes ctags inserts to escape the search delimiter
d1245 2
a1246 3
	while (p[0]) {
		/* Ctags escapes the search delimiter characters. */
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?'))
d1248 2
a1249 1
		else if (strchr("^.[]$*", p[0]))
a1254 1
	*t++ = '\0';
d1257 1
a1257 1
	*replacedp = 1;
d1267 1
a1267 1
re_cscope_conv(sp, ptrnp, replacedp)
d1270 1
d1274 1
a1274 1
	char *bp, *p, *re;
d1281 1
a1281 1
	for (nspaces = 0, p = *ptrnp; *p != '\0'; ++p)
d1287 3
a1289 3
	 *	the string, plus potential escaping characters
	 *	nspaces + 2 copies of CSCOPE_RE_SPACE
	 *	^, $, nul terminator characters
d1291 1
d1295 11
a1305 9
	p = bp;
	*p++ = '^';
	memcpy(p, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	p += sizeof(CSCOPE_RE_SPACE) - 1;

	for (re = *ptrnp; *re != '\0'; ++re)
		if (*re == ' ') {
			memcpy(p, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
			p += sizeof(CSCOPE_RE_SPACE) - 1;
d1307 3
a1309 3
			if (strchr("\\^.[]$*", *re))
				*p++ = '\\';
			*p++ = *re;
d1312 3
a1314 4
	memcpy(p, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	p += sizeof(CSCOPE_RE_SPACE) - 1;
	*p++ = '$';
	*p = '\0';
d1317 1
a1317 1
	*replacedp = 1;
d1387 1
a1387 1
	 * all escape characters.  This (hopefully) follows historic practice.
d1389 1
a1389 1
#define	OUTCH(ch) {							\
d1392 1
a1392 1
	if (__value == K_CR || __value == K_NL) {			\
d1448 1
a1448 1
					OUTCH(*t);
d1476 1
a1476 1
		OUTCH(ch);
@


1.11
log
@RCS Id Police.
@
text
@d1 1
a1 1
/*	$NetBSD$	*/
d368 3
d556 1
d558 3
d569 3
a571 1
nextmatch:	match[0].rm_so = 0;
d573 4
a576 1

d579 4
@


1.10
log
@a better (more careful) fix for problem fixed by rev 1.9 (the
rolled-forward patch to fix a 'comparison can never be true due to
limited range...' compiler warning).
@
text
@d1 2
@


1.9
log
@merge in fix lost in upgrade.
@
text
@d357 2
a358 2
	recno_t elno, slno;
	long lno;
d418 1
a418 1
			lno = strtoul(s, &s, 10);
d421 5
d427 1
a427 1
				if (lno == LONG_MAX)
d429 1
a429 1
				else if (lno == LONG_MIN)
@


1.8
log
@merge in nvi 1.66
@
text
@d357 2
a358 1
	recno_t elno, lno, slno;
@


1.7
log
@use the correct-sized type for lno.
@
text
@d4 2
d7 1
a7 27
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
d10 2
d13 1
a13 1
static char sccsid[] = "@@(#)ex_subst.c	8.59 (Berkeley) 8/17/94";
a23 1
#include <signal.h>
a26 1
#include <termios.h>
d29 2
a30 6
#include "compat.h"
#include <db.h>
#include <regex.h>

#include "vi.h"
#include "excmd.h"
d35 6
a40 4
static __inline int	regsub __P((SCR *, char *,
			    char **, size_t *, size_t *, regmatch_t [10]));
static int		substitute __P((SCR *, EXF *,
			    EXCMDARG *, char *, regex_t *, u_int));
d43 1
a43 1
 * ex_substitute --
d47 2
d51 1
a51 1
ex_substitute(sp, ep, cmdp)
d53 1
a53 2
	EXF *ep;
	EXCMDARG *cmdp;
d55 1
a55 1
	regex_t *re, lre;
d58 1
a58 1
	int delim, eval, reflags, replaced;
d72 2
d80 2
a81 1
		return (ex_subagain(sp, ep, cmdp));
d83 2
a84 3
	if (isalnum(delim))
		return (substitute(sp, ep,
		    cmdp, p, &sp->subre, SUB_MUSTSETR));
d92 1
a92 1
	
d137 2
a138 2
		if (!F_ISSET(sp, S_SRE_SET)) {
			msgq(sp, M_ERR, "No previous regular expression");
d141 5
a145 1
		re = &sp->sre;
a147 23
		/* Set RE flags. */
		reflags = 0;
		if (O_ISSET(sp, O_EXTENDED))
			reflags |= REG_EXTENDED;
		if (O_ISSET(sp, O_IGNORECASE))
			reflags |= REG_ICASE;

		/* Convert vi-style RE's to POSIX 1003.2 RE's. */
		if (re_conv(sp, &ptrn, &replaced))
			return (1);

		/* Compile the RE. */
		eval = regcomp(&lre, (char *)ptrn, reflags);

		/* Free up any allocated memory. */
		if (replaced)
			FREE_SPACE(sp, ptrn, 0);

		if (eval) {
			re_error(sp, eval, &lre);
			return (1);
		}

a148 2
		 * Set saved RE.
		 *
d150 4
a153 2
		 * Historic practice is that substitutes set the search
		 * direction as well as both substitute and search RE's.
d155 8
a163 7
		sp->sre = lre;
		F_SET(sp, S_SRE_SET);
		sp->subre = lre;
		F_SET(sp, S_SUBRE_SET);

		re = &lre;
		flags = SUB_FIRST;
d165 1
d172 1
a172 1
	 * regsub().
d195 1
a195 1
			FREE(sp->repl, sp->repl_len);
d245 1
a245 1
	return (substitute(sp, ep, cmdp, p, re, flags));
d253 2
d257 1
a257 1
ex_subagain(sp, ep, cmdp)
d259 1
a259 2
	EXF *ep;
	EXCMDARG *cmdp;
d261 2
a262 2
	if (!F_ISSET(sp, S_SUBRE_SET)) {
		msgq(sp, M_ERR, "No previous regular expression");
d265 5
a269 1
	return (substitute(sp, ep, cmdp, cmdp->argv[0]->bp, &sp->subre, 0));
d277 2
d281 1
a281 1
ex_subtilde(sp, ep, cmdp)
d283 1
a283 2
	EXF *ep;
	EXCMDARG *cmdp;
d285 2
a286 2
	if (!F_ISSET(sp, S_SRE_SET)) {
		msgq(sp, M_ERR, "No previous regular expression");
d289 5
a293 1
	return (substitute(sp, ep, cmdp, cmdp->argv[0]->bp, &sp->sre, 0));
d297 5
d306 1
a306 1
 * when the replacement is done.  Don't change it unless you're pretty damned
a345 6
/*
 * substitute --
 *	Do the substitution.  This stuff is *really* tricky.  There are
 *	lots of special cases, and general nastiness.  Don't mess with it
 * 	unless you're pretty confident.
 */
d347 1
a347 1
substitute(sp, ep, cmdp, s, re, flags)
d349 1
a349 2
	EXF *ep;
	EXCMDARG *cmdp;
d354 1
d356 2
a357 2
	recno_t elno;
	long lno;
d359 2
a360 1
	size_t blen, cnt, last, lbclen, lblen, len, llen, offset, saved_offset;
d366 5
d377 1
a377 1
	 * specified substitute/replacement patterns (see ex_substitute()).
d399 2
d422 1
a422 1
					msgq(sp, M_ERR, "Count overflow");
d424 1
a424 1
					msgq(sp, M_ERR, "Count underflow");
d435 3
d444 6
d463 1
a463 1
		    "Regular expression specified; r flag meaningless");
d466 2
a467 3
			if (!F_ISSET(sp, S_SRE_SET)) {
				msgq(sp, M_ERR,
				    "No previous regular expression");
d471 1
a471 1
			re = &sp->sre;
d478 1
a478 1
usage:		msgq(sp, M_ERR, "Usage: %s", cmdp->cmd->usage);
d482 1
a482 1
	if (IN_VI_MODE(sp) && sp->c_suffix && (lflag || nflag || pflag)) {
d484 1
a484 1
	"The #, l and p flags may not be combined with the c flag in vi mode");
d503 1
a503 3
		if (INTERRUPTED(sp)) {
			if (!F_ISSET(sp, S_GLOBAL))
				msgq(sp, M_INFO, "Interrupted");
a504 1
		}
d507 2
a508 4
		if ((s = file_gline(sp, ep, lno, &llen)) == NULL) {
			GETLINE_ERR(sp, lno);
			goto ret1;
		}
d569 1
a569 1
			goto ret1;
a611 5
			 *
			 * XXX
			 * We may want to "fix" this in the confirm routine,
			 * if the confirm routine should be able to display
			 * a cursor past EOL.
d615 5
a619 1
			to.cno = match[0].rm_eo;
d622 9
a630 1
			else {
d635 29
d665 3
a667 2
			switch (sp->s_confirm(sp, ep, &from, &to)) {
			case CONF_YES:
d669 2
a670 1
			case CONF_NO:
d674 4
a677 7
			case CONF_QUIT:
				/* Set the quit flag. */
				quit = 1;

				/* If interruptible, pass the info back. */
				if (F_ISSET(sp, S_INTERRUPTIBLE))
					F_SET(sp, S_INTERRUPTED);
d680 2
a681 2
				 * If any changes, resolve them, otherwise
				 * return to the main loop.
d687 7
d699 2
a700 2
		if (regsub(sp, s + offset, &lb, &lbclen, &lblen, match))
			goto ret1;
d736 1
a736 1
					if (file_iline(sp, ep, lno,
d738 1
a738 1
						goto ret1;
d748 4
a751 6
			if (file_sline(sp, ep, lno, lb + last, lbclen))
				goto ret1;
			if ((s = file_gline(sp, ep, lno, &llen)) == NULL) {
				GETLINE_ERR(sp, lno);
				goto ret1;
			}
d802 1
a802 1
				if (file_iline(sp, ep,
d804 1
a804 1
					goto ret1;
d813 2
a814 2
		if (file_sline(sp, ep, lno, lb + last, lbclen))
			goto ret1;
d832 1
a832 1
				ex_print(sp, ep, &from, &to, E_F_LIST);
d834 1
a834 1
				ex_print(sp, ep, &from, &to, E_F_HASH);
d836 1
a836 1
				ex_print(sp, ep, &from, &to, E_F_PRINT);
a837 10

		if (!sp->c_suffix)
			sp->lno = lno;

		/*
		 * !!!
		 * Move the cursor to the last line changed.
		 */
		if (!sp->c_suffix)
			sp->lno = lno;
d842 7
a848 1
	 * Move the cursor to the first non-blank of the last line change.
d850 5
a854 3
	 * XXX
	 * This is NOT backward compatible with historic vi, which always
	 * moved to the last line actually changed.
d856 1
a856 1
	if (!sp->c_suffix) {
d858 1
a858 1
		(void)nonblank(sp, ep, sp->lno, &sp->cno);
d865 1
d867 4
a870 2
		if (!F_ISSET(sp, S_GLOBAL))
			msgq(sp, M_INFO, "No match found");
d872 1
a872 1
		F_SET(EXP(sp), EX_AUTOPRINT);
a873 1
	rval = 0;
d875 1
a875 1
ret1:		rval = 1;
d886 404
a1289 1
 * regsub --
d1292 2
a1293 2
static __inline int
regsub(sp, ip, lbp, lbclenp, lblenp, match)
d1331 1
a1331 1
#define	ADDCH(ch) {							\
d1390 1
a1390 1
					ADDCH(*t);
d1418 1
a1418 1
		ADDCH(ch);
@


1.6
log
@clean up import.
@
text
@d387 2
a388 1
	recno_t elno, lno;
@


1.6.2.1
log
@file ex_subst.c was added on branch netbsd-1-0 on 1994-08-17 20:13:01 +0000
@
text
@d1 1001
@


1.6.2.2
log
@clean up import.
@
text
@a0 1001
/*-
 * Copyright (c) 1992, 1993, 1994
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char sccsid[] = "@@(#)ex_subst.c	8.59 (Berkeley) 8/17/94";
#endif /* not lint */

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/time.h>

#include <bitstring.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>

#include "compat.h"
#include <db.h>
#include <regex.h>

#include "vi.h"
#include "excmd.h"

#define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
#define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */

static __inline int	regsub __P((SCR *, char *,
			    char **, size_t *, size_t *, regmatch_t [10]));
static int		substitute __P((SCR *, EXF *,
			    EXCMDARG *, char *, regex_t *, u_int));

/*
 * ex_substitute --
 *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
 *
 *	Substitute on lines matching a pattern.
 */
int
ex_substitute(sp, ep, cmdp)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
{
	regex_t *re, lre;
	size_t blen, len;
	u_int flags;
	int delim, eval, reflags, replaced;
	char *bp, *ptrn, *rep, *p, *t;

	/*
	 * Skip leading white space.
	 *
	 * !!!
	 * Historic vi allowed any non-alphanumeric to serve as the
	 * substitution command delimiter.
	 *
	 * !!!
	 * If the arguments are empty, it's the same as &, i.e. we
	 * repeat the last substitution.
	 */
	for (p = cmdp->argv[0]->bp,
	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
		if (!isblank(*p))
			break;
	}
	if (len == 0)
		return (ex_subagain(sp, ep, cmdp));
	delim = *p++;
	if (isalnum(delim))
		return (substitute(sp, ep,
		    cmdp, p, &sp->subre, SUB_MUSTSETR));

	/*
	 * !!!
	 * The full-blown substitute command reset the remembered
	 * state of the 'c' and 'g' suffices.
	 */
	sp->c_suffix = sp->g_suffix = 0;
	
	/*
	 * Get the pattern string, toss escaped characters.
	 *
	 * !!!
	 * Historic vi accepted any of the following forms:
	 *
	 *	:s/abc/def/		change "abc" to "def"
	 *	:s/abc/def		change "abc" to "def"
	 *	:s/abc/			delete "abc"
	 *	:s/abc			delete "abc"
	 *
	 * QUOTING NOTE:
	 *
	 * Only toss an escape character if it escapes a delimiter.
	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
	 * would be nice to be more regular, i.e. for each layer of
	 * escaping a single escape character is removed, but that's
	 * not how the historic vi worked.
	 */
	for (ptrn = t = p;;) {
		if (p[0] == '\0' || p[0] == delim) {
			if (p[0] == delim)
				++p;
			/*
			 * !!!
			 * Nul terminate the pattern string -- it's passed
			 * to regcomp which doesn't understand anything else.
			 */
			*t = '\0';
			break;
		}
		if (p[0] == '\\')
			if (p[1] == delim)
				++p;
			else if (p[1] == '\\')
				*t++ = *p++;
		*t++ = *p++;
	}

	/*
	 * If the pattern string is empty, use the last RE (not just the
	 * last substitution RE).
	 */
	if (*ptrn == '\0') {
		if (!F_ISSET(sp, S_SRE_SET)) {
			msgq(sp, M_ERR, "No previous regular expression");
			return (1);
		}
		re = &sp->sre;
		flags = 0;
	} else {
		/* Set RE flags. */
		reflags = 0;
		if (O_ISSET(sp, O_EXTENDED))
			reflags |= REG_EXTENDED;
		if (O_ISSET(sp, O_IGNORECASE))
			reflags |= REG_ICASE;

		/* Convert vi-style RE's to POSIX 1003.2 RE's. */
		if (re_conv(sp, &ptrn, &replaced))
			return (1);

		/* Compile the RE. */
		eval = regcomp(&lre, (char *)ptrn, reflags);

		/* Free up any allocated memory. */
		if (replaced)
			FREE_SPACE(sp, ptrn, 0);

		if (eval) {
			re_error(sp, eval, &lre);
			return (1);
		}

		/*
		 * Set saved RE.
		 *
		 * !!!
		 * Historic practice is that substitutes set the search
		 * direction as well as both substitute and search RE's.
		 */
		sp->searchdir = FORWARD;
		sp->sre = lre;
		F_SET(sp, S_SRE_SET);
		sp->subre = lre;
		F_SET(sp, S_SUBRE_SET);

		re = &lre;
		flags = SUB_FIRST;
	}

	/*
	 * Get the replacement string.
	 *
	 * The special character & (\& if O_MAGIC not set) matches the
	 * entire RE.  No handling of & is required here, it's done by
	 * regsub().
	 *
	 * The special character ~ (\~ if O_MAGIC not set) inserts the
	 * previous replacement string into this replacement string.
	 * Count ~'s to figure out how much space we need.  We could
	 * special case nonexistent last patterns or whether or not
	 * O_MAGIC is set, but it's probably not worth the effort.
	 *
	 * QUOTING NOTE:
	 *
	 * Only toss an escape character if it escapes a delimiter or
	 * if O_MAGIC is set and it escapes a tilde.
	 *
	 * !!!
	 * If the entire replacement pattern is "%", then use the last
	 * replacement pattern.  This semantic was added to vi in System
	 * V and then percolated elsewhere, presumably around the time
	 * that it was added to their version of ed(1).
	 */
	if (p[0] == '\0' || p[0] == delim) {
		if (p[0] == delim)
			++p;
		if (sp->repl != NULL)
			FREE(sp->repl, sp->repl_len);
		sp->repl = NULL;
		sp->repl_len = 0;
	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
		p += p[1] == delim ? 2 : 1;
	else {
		for (rep = p, len = 0;
		    p[0] != '\0' && p[0] != delim; ++p, ++len)
			if (p[0] == '~')
				len += sp->repl_len;
		GET_SPACE_RET(sp, bp, blen, len);
		for (t = bp, len = 0, p = rep;;) {
			if (p[0] == '\0' || p[0] == delim) {
				if (p[0] == delim)
					++p;
				break;
			}
			if (p[0] == '\\') {
				if (p[1] == delim)
					++p;
				else if (p[1] == '\\') {
					*t++ = *p++;
					++len;
				} else if (p[1] == '~') {
					++p;
					if (!O_ISSET(sp, O_MAGIC))
						goto tilde;
				}
			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
tilde:				++p;
				memmove(t, sp->repl, sp->repl_len);
				t += sp->repl_len;
				len += sp->repl_len;
				continue;
			}
			*t++ = *p++;
			++len;
		}
		if ((sp->repl_len = len) != 0) {
			if (sp->repl != NULL)
				free(sp->repl);
			if ((sp->repl = malloc(len)) == NULL) {
				msgq(sp, M_SYSERR, NULL);
				FREE_SPACE(sp, bp, blen);
				return (1);
			}
			memmove(sp->repl, bp, len);
		}
		FREE_SPACE(sp, bp, blen);
	}
	return (substitute(sp, ep, cmdp, p, re, flags));
}

/*
 * ex_subagain --
 *	[line [,line]] & [cgr] [count] [#lp]]
 *
 *	Substitute using the last substitute RE and replacement pattern.
 */
int
ex_subagain(sp, ep, cmdp)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
{
	if (!F_ISSET(sp, S_SUBRE_SET)) {
		msgq(sp, M_ERR, "No previous regular expression");
		return (1);
	}
	return (substitute(sp, ep, cmdp, cmdp->argv[0]->bp, &sp->subre, 0));
}

/*
 * ex_subtilde --
 *	[line [,line]] ~ [cgr] [count] [#lp]]
 *
 *	Substitute using the last RE and last substitute replacement pattern.
 */
int
ex_subtilde(sp, ep, cmdp)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
{
	if (!F_ISSET(sp, S_SRE_SET)) {
		msgq(sp, M_ERR, "No previous regular expression");
		return (1);
	}
	return (substitute(sp, ep, cmdp, cmdp->argv[0]->bp, &sp->sre, 0));
}

/*
 * The nasty part of the substitution is what happens when the replacement
 * string contains newlines.  It's a bit tricky -- consider the information
 * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 * to build a set of newline offsets which we use to break the line up later,
 * when the replacement is done.  Don't change it unless you're pretty damned
 * confident.
 */
#define	NEEDNEWLINE(sp) {						\
	if (sp->newl_len == sp->newl_cnt) {				\
		sp->newl_len += 25;					\
		REALLOC(sp, sp->newl, size_t *,				\
		    sp->newl_len * sizeof(size_t));			\
		if (sp->newl == NULL) {					\
			sp->newl_len = 0;				\
			return (1);					\
		}							\
	}								\
}

#define	BUILD(sp, l, len) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAX(lbclen + (len), 256);			\
		REALLOC(sp, lb, char *, lblen);				\
		if (lb == NULL) {					\
			lbclen = 0;					\
			return (1);					\
		}							\
	}								\
	memmove(lb + lbclen, l, len);					\
	lbclen += len;							\
}

#define	NEEDSP(sp, len, pnt) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAX(lbclen + (len), 256);			\
		REALLOC(sp, lb, char *, lblen);				\
		if (lb == NULL) {					\
			lbclen = 0;					\
			return (1);					\
		}							\
		pnt = lb + lbclen;					\
	}								\
}

/*
 * substitute --
 *	Do the substitution.  This stuff is *really* tricky.  There are
 *	lots of special cases, and general nastiness.  Don't mess with it
 * 	unless you're pretty confident.
 */
static int
substitute(sp, ep, cmdp, s, re, flags)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
	char *s;
	regex_t *re;
	u_int flags;
{
	MARK from, to;
	recno_t elno, lno;
	regmatch_t match[10];
	size_t blen, cnt, last, lbclen, lblen, len, llen, offset, saved_offset;
	int cflag, lflag, nflag, pflag, rflag;
	int didsub, do_eol_match, eflags, empty_ok, eval;
	int linechanged, matched, quit, rval;
	char *bp, *lb;

	/*
	 * !!!
	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
	 * not set, they were initialized to 0 for all substitute commands.  If
	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
	 * specified substitute/replacement patterns (see ex_substitute()).
	 */
	if (!O_ISSET(sp, O_EDCOMPATIBLE))
		sp->c_suffix = sp->g_suffix = 0;

	/*
	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
	 * it only displayed the last change.  I'd disallow them, but they are
	 * useful in combination with the [v]global commands.  In the current
	 * model the problem is combining them with the 'c' flag -- the screen
	 * would have to flip back and forth between the confirm screen and the
	 * ex print screen, which would be pretty awful.  We do display all
	 * changes, though, for what that's worth.
	 *
	 * !!!
	 * Historic vi was fairly strict about the order of "options", the
	 * count, and "flags".  I'm somewhat fuzzy on the difference between
	 * options and flags, anyway, so this is a simpler approach, and we
	 * just take it them in whatever order the user gives them.  (The ex
	 * usage statement doesn't reflect this.)
	 */
	cflag = lflag = nflag = pflag = rflag = 0;
	for (lno = OOBLNO; *s != '\0'; ++s)
		switch (*s) {
		case ' ':
		case '\t':
			continue;
		case '+':
			++cmdp->flagoff;
			break;
		case '-':
			--cmdp->flagoff;
			break;
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			if (lno != OOBLNO)
				goto usage;
			errno = 0;
			lno = strtoul(s, &s, 10);
			if (*s == '\0')		/* Loop increment correction. */
				--s;
			if (errno == ERANGE) {
				if (lno == LONG_MAX)
					msgq(sp, M_ERR, "Count overflow");
				else if (lno == LONG_MIN)
					msgq(sp, M_ERR, "Count underflow");
				else
					msgq(sp, M_SYSERR, NULL);
				return (1);
			}
			/*
			 * In historic vi, the count was inclusive from the
			 * second address.
			 */
			cmdp->addr1.lno = cmdp->addr2.lno;
			cmdp->addr2.lno += lno - 1;
			break;
		case '#':
			nflag = 1;
			break;
		case 'c':
			sp->c_suffix = !sp->c_suffix;
			break;
		case 'g':
			sp->g_suffix = !sp->g_suffix;
			break;
		case 'l':
			lflag = 1;
			break;
		case 'p':
			pflag = 1;
			break;
		case 'r':
			if (LF_ISSET(SUB_FIRST)) {
				msgq(sp, M_ERR,
		    "Regular expression specified; r flag meaningless");
				return (1);
			}
			if (!F_ISSET(sp, S_SRE_SET)) {
				msgq(sp, M_ERR,
				    "No previous regular expression");
				return (1);
			}
			rflag = 1;
			re = &sp->sre;
			break;
		default:
			goto usage;
		}

	if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
usage:		msgq(sp, M_ERR, "Usage: %s", cmdp->cmd->usage);
		return (1);
	}

	if (IN_VI_MODE(sp) && sp->c_suffix && (lflag || nflag || pflag)) {
		msgq(sp, M_ERR,
	"The #, l and p flags may not be combined with the c flag in vi mode");
		return (1);
	}

	/*
	 * bp:		if interactive, line cache
	 * blen:	if interactive, line cache length
	 * lb:		build buffer pointer.
	 * lbclen:	current length of built buffer.
	 * lblen;	length of build buffer.
	 */
	bp = lb = NULL;
	blen = lbclen = lblen = 0;

	/* For each line... */
	for (matched = quit = 0, lno = cmdp->addr1.lno,
	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {

		/* Someone's unhappy, time to stop. */
		if (INTERRUPTED(sp)) {
			if (!F_ISSET(sp, S_GLOBAL))
				msgq(sp, M_INFO, "Interrupted");
			break;
		}

		/* Get the line. */
		if ((s = file_gline(sp, ep, lno, &llen)) == NULL) {
			GETLINE_ERR(sp, lno);
			goto ret1;
		}

		/*
		 * Make a local copy if doing confirmation -- when calling
		 * the confirm routine we're likely to lose the cached copy.
		 */
		if (sp->c_suffix) {
			if (bp == NULL) {
				GET_SPACE_RET(sp, bp, blen, llen);
			} else
				ADD_SPACE_RET(sp, bp, blen, llen);
			memmove(bp, s, llen);
			s = bp;
		}

		/* Start searching from the beginning. */
		offset = 0;
		len = llen;

		/* Reset the build buffer offset. */
		lbclen = 0;

		/* Reset empty match flag. */
		empty_ok = 1;

		/*
		 * We don't want to have to do a setline if the line didn't
		 * change -- keep track of whether or not this line changed.
		 * If doing confirmations, don't want to keep setting the
		 * line if change is refused -- keep track of substitutions.
		 */
		didsub = linechanged = 0;

		/* New line, do an EOL match. */
		do_eol_match = 1;

		/* It's not nul terminated, but we pretend it is. */
		eflags = REG_STARTEND;

		/*
		 * The search area is from s + offset to the EOL.
		 *
		 * Generally, match[0].rm_so is the offset of the start
		 * of the match from the start of the search, and offset
		 * is the offset of the start of the last search.
		 */
nextmatch:	match[0].rm_so = 0;
		match[0].rm_eo = len;

		/* Get the next match. */
		eval = regexec(re, (char *)s + offset, 10, match, eflags);

		/*
		 * There wasn't a match or if there was an error, deal with
		 * it.  If there was a previous match in this line, resolve
		 * the changes into the database.  Otherwise, just move on.
		 */
		if (eval == REG_NOMATCH)
			goto endmatch;
		if (eval != 0) {
			re_error(sp, eval, re);
			goto ret1;
		}
		matched = 1;

		/* Only the first search can match an anchored expression. */
		eflags |= REG_NOTBOL;

		/*
		 * !!!
		 * It's possible to match 0-length strings -- for example, the
		 * command s;a*;X;, when matched against the string "aabb" will
		 * result in "XbXbX", i.e. the matches are "aa", the space
		 * between the b's and the space between the b's and the end of
		 * the string.  There is a similar space between the beginning
		 * of the string and the a's.  The rule that we use (because vi
		 * historically used it) is that any 0-length match, occurring
		 * immediately after a match, is ignored.  Otherwise, the above
		 * example would have resulted in "XXbXbX".  Another example is
		 * incorrectly using " *" to replace groups of spaces with one
		 * space.
		 *
		 * The way we do this is that if we just had a successful match,
		 * the starting offset does not skip characters, and the match
		 * is empty, ignore the match and move forward.  If there's no
		 * more characters in the string, we were attempting to match
		 * after the last character, so quit.
		 */
		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
			empty_ok = 1;
			if (len == 0)
				goto endmatch;
			BUILD(sp, s + offset, 1)
			++offset;
			--len;
			goto nextmatch;
		}

		/* Confirm change. */
		if (sp->c_suffix) {
			/*
			 * Set the cursor position for confirmation.  Note,
			 * if we matched on a '$', the cursor may be past
			 * the end of line.
			 *
			 * XXX
			 * We may want to "fix" this in the confirm routine,
			 * if the confirm routine should be able to display
			 * a cursor past EOL.
			 */
			from.lno = to.lno = lno;
			from.cno = match[0].rm_so + offset;
			to.cno = match[0].rm_eo;
			if (llen == 0)
				from.cno = to.cno = 0;
			else {
				if (to.cno >= llen)
					to.cno = llen - 1;
				if (from.cno >= llen)
					from.cno = llen - 1;
			}
			switch (sp->s_confirm(sp, ep, &from, &to)) {
			case CONF_YES:
				break;
			case CONF_NO:
				didsub = 0;
				BUILD(sp, s +offset, match[0].rm_eo);
				goto skip;
			case CONF_QUIT:
				/* Set the quit flag. */
				quit = 1;

				/* If interruptible, pass the info back. */
				if (F_ISSET(sp, S_INTERRUPTIBLE))
					F_SET(sp, S_INTERRUPTED);

				/*
				 * If any changes, resolve them, otherwise
				 * return to the main loop.
				 */
				goto endmatch;
			}
		}

		/* Copy the bytes before the match into the build buffer. */
		BUILD(sp, s + offset, match[0].rm_so);

		/* Substitute the matching bytes. */
		didsub = 1;
		if (regsub(sp, s + offset, &lb, &lbclen, &lblen, match))
			goto ret1;

		/* Set the change flag so we know this line was modified. */
		linechanged = 1;

		/* Move past the matched bytes. */
skip:		offset += match[0].rm_eo;
		len -= match[0].rm_eo;

		/* A match cannot be followed by an empty pattern. */
		empty_ok = 0;

		/*
		 * If doing a global change with confirmation, we have to
		 * update the screen.  The basic idea is to store the line
		 * so the screen update routines can find it, and restart.
		 */
		if (didsub && sp->c_suffix && sp->g_suffix) {
			/*
			 * The new search offset will be the end of the
			 * modified line.
			 */
			saved_offset = lbclen;

			/* Copy the rest of the line. */
			if (len)
				BUILD(sp, s + offset, len)

			/* Set the new offset. */
			offset = saved_offset;

			/* Store inserted lines, adjusting the build buffer. */
			last = 0;
			if (sp->newl_cnt) {
				for (cnt = 0;
				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
					if (file_iline(sp, ep, lno,
					    lb + last, sp->newl[cnt] - last))
						goto ret1;
					last = sp->newl[cnt] + 1;
					++sp->rptlines[L_ADDED];
				}
				lbclen -= last;
				offset -= last;
				sp->newl_cnt = 0;
			}

			/* Store and retrieve the line. */
			if (file_sline(sp, ep, lno, lb + last, lbclen))
				goto ret1;
			if ((s = file_gline(sp, ep, lno, &llen)) == NULL) {
				GETLINE_ERR(sp, lno);
				goto ret1;
			}
			ADD_SPACE_RET(sp, bp, blen, llen)
			memmove(bp, s, llen);
			s = bp;
			len = llen - offset;

			/* Restart the build. */
			lbclen = 0;
			BUILD(sp, s, offset);

			/*
			 * If we haven't already done the after-the-string
			 * match, do one.  Set REG_NOTEOL so the '$' pattern
			 * only matches once.
			 */
			if (!do_eol_match)
				goto endmatch;
			if (offset == len) {
				do_eol_match = 0;
				eflags |= REG_NOTEOL;
			}
			goto nextmatch;
		}

		/*
		 * If it's a global:
		 *
		 * If at the end of the string, do a test for the after
		 * the string match.  Set REG_NOTEOL so the '$' pattern
		 * only matches once.
		 */
		if (sp->g_suffix && do_eol_match) {
			if (len == 0) {
				do_eol_match = 0;
				eflags |= REG_NOTEOL;
			}
			goto nextmatch;
		}

endmatch:	if (!linechanged)
			continue;

		/* Copy any remaining bytes into the build buffer. */
		if (len)
			BUILD(sp, s + offset, len)

		/* Store inserted lines, adjusting the build buffer. */
		last = 0;
		if (sp->newl_cnt) {
			for (cnt = 0;
			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
				if (file_iline(sp, ep,
				    lno, lb + last, sp->newl[cnt] - last))
					goto ret1;
				last = sp->newl[cnt] + 1;
				++sp->rptlines[L_ADDED];
			}
			lbclen -= last;
			sp->newl_cnt = 0;
		}

		/* Store the changed line. */
		if (file_sline(sp, ep, lno, lb + last, lbclen))
			goto ret1;

		/* Update changed line counter. */
		if (sp->rptlchange != lno) {
			sp->rptlchange = lno;
			++sp->rptlines[L_CHANGED];
		}

		/*
		 * !!!
		 * Display as necessary.  Historic practice is to only
		 * display the last line of a line split into multiple
		 * lines.
		 */
		if (lflag || nflag || pflag) {
			from.lno = to.lno = lno;
			from.cno = to.cno = 0;
			if (lflag)
				ex_print(sp, ep, &from, &to, E_F_LIST);
			if (nflag)
				ex_print(sp, ep, &from, &to, E_F_HASH);
			if (pflag)
				ex_print(sp, ep, &from, &to, E_F_PRINT);
		}

		if (!sp->c_suffix)
			sp->lno = lno;

		/*
		 * !!!
		 * Move the cursor to the last line changed.
		 */
		if (!sp->c_suffix)
			sp->lno = lno;
	}

	/*
	 * !!!
	 * Move the cursor to the first non-blank of the last line change.
	 *
	 * XXX
	 * This is NOT backward compatible with historic vi, which always
	 * moved to the last line actually changed.
	 */
	if (!sp->c_suffix) {
		sp->cno = 0;
		(void)nonblank(sp, ep, sp->lno, &sp->cno);
	}

	/*
	 * If not in a global command, and nothing matched, say so.
	 * Else, if none of the lines displayed, put something up.
	 */
	if (!matched) {
		if (!F_ISSET(sp, S_GLOBAL))
			msgq(sp, M_INFO, "No match found");
	} else if (!lflag && !nflag && !pflag)
		F_SET(EXP(sp), EX_AUTOPRINT);

	rval = 0;
	if (0) {
ret1:		rval = 1;
	}

	if (bp != NULL)
		FREE_SPACE(sp, bp, blen);
	if (lb != NULL)
		free(lb);
	return (rval);
}

/*
 * regsub --
 * 	Do the substitution for a regular expression.
 */
static __inline int
regsub(sp, ip, lbp, lbclenp, lblenp, match)
	SCR *sp;
	char *ip;			/* Input line. */
	char **lbp;
	size_t *lbclenp, *lblenp;
	regmatch_t match[10];
{
	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
	size_t lbclen, lblen;		/* Local copies. */
	size_t mlen;			/* Match length. */
	size_t rpl;			/* Remaining replacement length. */
	char *rp;			/* Replacement pointer. */
	int ch;
	int no;				/* Match replacement offset. */
	char *p, *t;			/* Buffer pointers. */
	char *lb;			/* Local copies. */

	lb = *lbp;			/* Get local copies. */
	lbclen = *lbclenp;
	lblen = *lblenp;

	/*
	 * QUOTING NOTE:
	 *
	 * There are some special sequences that vi provides in the
	 * replacement patterns.
	 *	 & string the RE matched (\& if nomagic set)
	 *	\# n-th regular subexpression
	 *	\E end \U, \L conversion
	 *	\e end \U, \L conversion
	 *	\l convert the next character to lower-case
	 *	\L convert to lower-case, until \E, \e, or end of replacement
	 *	\u convert the next character to upper-case
	 *	\U convert to upper-case, until \E, \e, or end of replacement
	 *
	 * Otherwise, since this is the lowest level of replacement, discard
	 * all escape characters.  This (hopefully) follows historic practice.
	 */
#define	ADDCH(ch) {							\
	CHAR_T __ch = (ch);						\
	u_int __value = KEY_VAL(sp, __ch);				\
	if (__value == K_CR || __value == K_NL) {			\
		NEEDNEWLINE(sp);					\
		sp->newl[sp->newl_cnt++] = lbclen;			\
	} else if (conv != C_NOTSET) {					\
		switch (conv) {						\
		case C_ONELOWER:					\
			conv = C_NOTSET;				\
			/* FALLTHROUGH */				\
		case C_LOWER:						\
			if (isupper(__ch))				\
				__ch = tolower(__ch);			\
			break;						\
		case C_ONEUPPER:					\
			conv = C_NOTSET;				\
			/* FALLTHROUGH */				\
		case C_UPPER:						\
			if (islower(__ch))				\
				__ch = toupper(__ch);			\
			break;						\
		default:						\
			abort();					\
		}							\
	}								\
	NEEDSP(sp, 1, p);						\
	*p++ = __ch;							\
	++lbclen;							\
}
	conv = C_NOTSET;
	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
		switch (ch = *rp++) {
		case '&':
			if (O_ISSET(sp, O_MAGIC)) {
				no = 0;
				goto subzero;
			}
			break;
		case '\\':
			if (rpl == 0)
				break;
			--rpl;
			switch (ch = *rp) {
			case '&':
				++rp;
				if (!O_ISSET(sp, O_MAGIC)) {
					no = 0;
					goto subzero;
				}
				break;
			case '0': case '1': case '2': case '3': case '4':
			case '5': case '6': case '7': case '8': case '9':
				no = *rp++ - '0';
subzero:			if (match[no].rm_so == -1 ||
			    	    match[no].rm_eo == -1)
					break;
				mlen = match[no].rm_eo - match[no].rm_so;
				for (t = ip + match[no].rm_so; mlen--; ++t)
					ADDCH(*t);
				continue;
			case 'e':
			case 'E':
				++rp;
				conv = C_NOTSET;
				continue;
			case 'l':
				++rp;
				conv = C_ONELOWER;
				continue;
			case 'L':
				++rp;
				conv = C_LOWER;
				continue;
			case 'u':
				++rp;
				conv = C_ONEUPPER;
				continue;
			case 'U':
				++rp;
				conv = C_UPPER;
				continue;
			default:
				++rp;
				break;
			}
		}
		ADDCH(ch);
	}

	*lbp = lb;			/* Update caller's information. */
	*lbclenp = lbclen;
	*lblenp = lblen;
	return (0);
}
@


1.5
log
@clean up import.  still have to hack some things.
@
text
@d35 1
a35 1
static const char sccsid[] = "@@(#)ex_subst.c	8.58 (Berkeley) 8/17/94";
@


1.4
log
@nvi 1.11(beta) from bostic.  reconcile conflicts/kill rcsids.
@
text
@d35 1
a35 1
static char sccsid[] = "@@(#)ex_subst.c	8.39 (Berkeley) 3/22/94";
d63 2
a64 3
static int		checkmatchsize __P((SCR *, regex_t *));
static inline int	regsub __P((SCR *,
			    char *, char **, size_t *, size_t *));
d110 7
d155 7
a161 5
	/* If the pattern string is empty, use the last one. */
	if (*ptrn == NULL) {
		if (!F_ISSET(sp, S_SUBRE_SET)) {
			msgq(sp, M_ERR,
			    "No previous regular expression.");
d164 1
a164 1
		re = &sp->subre;
d224 6
d231 3
a233 1
	if (*p == '\0') {
d238 3
a240 1
	} else {
d273 9
a281 6
		if (sp->repl != NULL)
			FREE(sp->repl, sp->repl_len);
		if ((sp->repl = malloc(len)) == NULL) {
			msgq(sp, M_SYSERR, NULL);
			FREE_SPACE(sp, bp, blen);
			return (1);
a282 2
		memmove(sp->repl, bp, len);
		sp->repl_len = len;
a284 3

	if (checkmatchsize(sp, &sp->subre))
		return (1);
d301 1
a301 1
		msgq(sp, M_ERR, "No previous regular expression.");
d320 1
a320 1
		msgq(sp, M_ERR, "No previous regular expression.");
d388 1
d390 1
a390 1
	int cflag, gflag, lflag, nflag, pflag, rflag;
d392 1
a392 1
	int linechanged, matched, quit, rval, teardown;
d396 11
d422 1
a422 1
	cflag = gflag = lflag = nflag = pflag = rflag = 0;
d427 6
d444 1
a444 1
					msgq(sp, M_ERR, "Count overflow.");
d446 1
a446 1
					msgq(sp, M_ERR, "Count underflow.");
d462 1
a462 1
			cflag = 1;
d465 1
a465 1
			gflag = 1;
d476 1
a476 1
		    "Regular expression specified; r flag meaningless.");
d479 1
a479 1
			if (!F_ISSET(sp, S_SUBRE_SET)) {
d481 1
a481 1
				    "No previous regular expression.");
d485 1
d496 1
a496 1
	if (IN_VI_MODE(sp) && cflag && (lflag || nflag || pflag)) {
d498 1
a498 1
	"The #, l and p flags may not be combined with the c flag in vi mode.");
a501 3
	/* Set up interrupts. */
	teardown = !intr_init(sp);

d517 1
a517 1
		if (F_ISSET(sp, S_INTERRUPTED)) {
d519 1
a519 1
				msgq(sp, M_INFO, "Interrupted.");
d526 1
a526 1
			return (1);
d533 1
a533 1
		if (cflag) {
d569 3
a571 3
		 * Generally, sp->match[0].rm_so is the offset of the start
		 * of the match from the start of the search, and offset is
		 * the offset of the start of the last search.
d573 2
a574 2
nextmatch:	sp->match[0].rm_so = 0;
		sp->match[0].rm_eo = len;
d577 1
a577 2
		eval = regexec(re,
		    (char *)s + offset, re->re_nsub + 1, sp->match, eflags);
d615 1
a615 2
		if (!empty_ok &&
		    sp->match[0].rm_so == 0 && sp->match[0].rm_eo == 0) {
d626 1
a626 1
		if (cflag) {
d638 2
a639 2
			from.cno = sp->match[0].rm_so + offset;
			to.cno = sp->match[0].rm_eo;
d653 1
a653 1
				BUILD(sp, s +offset, sp->match[0].rm_eo);
d672 1
a672 15
		BUILD(sp, s + offset, sp->match[0].rm_so);

		/*
		 * Cursor moves to last line changed, unless doing confirm,
		 * in which case don't move it.
		 *
		 * !!!
		 * Historic vi just put the cursor on the first non-blank
		 * of the last line changed.  We move to the beginning of
		 * the next substitution.
		 */
		if (!cflag) {
			sp->lno = lno;
			sp->cno = lbclen;
		}
d676 1
a676 1
		if (regsub(sp, s + offset, &lb, &lbclen, &lblen))
d683 2
a684 2
skip:		offset += sp->match[0].rm_eo;
		len -= sp->match[0].rm_eo;
d694 1
a694 1
		if (didsub && cflag && gflag) {
d761 1
a761 1
		if (gflag && do_eol_match) {
d796 4
a799 1
		++sp->rptlines[L_CHANGED];
d801 6
a806 1
		/* Display as necessary. */
d817 23
d848 1
a848 1
			msgq(sp, M_INFO, "No match found.");
a856 3
	if (teardown)
		intr_end(sp);

d859 2
d868 2
a869 2
static inline int
regsub(sp, ip, lbp, lbclenp, lblenp)
d874 1
d909 1
a909 1
	u_int __value = term_key_val(sp, __ch);				\
d952 1
a953 1
					++rp;
d961 5
a965 6
subzero:			if (sp->match[no].rm_so == -1 ||
			    	    sp->match[no].rm_eo == -1)
					continue;
				mlen =
				    sp->match[no].rm_eo - sp->match[no].rm_so;
				for (t = ip + sp->match[no].rm_so; mlen--; ++t)
a999 18
	return (0);
}

static int
checkmatchsize(sp, re)
	SCR *sp;
	regex_t *re;
{
	/* Build nsub array as necessary. */
	if (sp->matchsize < re->re_nsub + 1) {
		sp->matchsize = re->re_nsub + 1;
		REALLOC(sp, sp->match,
		    regmatch_t *, sp->matchsize * sizeof(regmatch_t));
		if (sp->match == NULL) {
			sp->matchsize = 0;
			return (1);
		}
	}
@


1.3
log
@Do the right thing if the pattern has an escaped escape character followed
immediately by a delimiter.
@
text
@d2 1
a2 1
 * Copyright (c) 1992, 1993
d35 1
a35 2
/* from: static char sccsid[] = "@@(#)ex_substitute.c	8.33 (Berkeley) 1/9/94"; */
static char *rcsid = "$Id: ex_subst.c,v 1.2 1994/01/24 06:40:41 cgd Exp $";
d39 2
d42 1
d45 3
d50 1
d53 4
a58 1
#include "interrupt.h"
a65 1
static void		subst_intr __P((int));
d141 1
a141 1
		if (p[0] == '\\') {
d144 1
a144 7
			else if (p[1] == '\\') {
				/*
				 * Skip over an escaped escape character;
				 * otherwise the check for an escaped
				 * delimiter will be confused on the next
				 * iteration.
		 		 */
a145 2
			}
		}
a201 3
	 * The special character ~ (\~ if O_MAGIC not set) inserts the
	 * previous replacement string into this replacement string.
	 *
d206 6
d215 1
a215 2
	 * an escape character, or if O_MAGIC is set and it escapes a
	 * tilde.
a222 5
		/*
		 * Count ~'s to figure out how much space we need.  We could
		 * special case nonexistent last patterns or whether or not
		 * O_MAGIC is set, but it's probably not worth the effort.
		 */
d235 1
a235 1
				if (p[1] == '\\' || p[1] == delim)
d237 4
a240 1
				else if (p[1] == '~') {
d310 1
a310 1
/* 
a369 1
	DECLARE_INTERRUPTS;
d373 1
d375 1
a375 2
	int linechanged, matched, quit, rval;
	int cflag, gflag, lflag, nflag, pflag, rflag;
d467 2
a468 2
	if (!F_ISSET(sp, S_GLOBAL))
		SET_UP_INTERRUPTS(subst_intr);
d632 1
a632 1
				
d650 2
a651 1
		 * of the last line changed.  This might be better.
d655 1
a655 1
			sp->cno = sp->match[0].rm_so + offset;
d810 2
a811 3
interrupt_err:
	if (!F_ISSET(sp, S_GLOBAL))
		TEAR_DOWN_INTERRUPTS;
d849 1
a849 1
	 *	\# n-th regular subexpression	
a972 22
}

/*
 * subst_intr --
 *	Set the interrupt bit in any screen that is interruptible.
 *
 * XXX
 * In the future this may be a problem.  The user should be able to move to
 * another screen and keep typing while this runs.  If so, and the user has
 * more than one substitute running, it will be hard to decide which one to
 * stop.
 */
static void
subst_intr(signo)
	int signo;
{
	SCR *sp;

	for (sp = __global_list->dq.cqh_first;
	    sp != (void *)&__global_list->dq; sp = sp->q.cqe_next)
		if (F_ISSET(sp, S_INTERRUPTIBLE))
			F_SET(sp, S_INTERRUPTED);
@


1.2
log
@more Ids than you'll ever want.
@
text
@d36 1
a36 1
static char *rcsid = "$Id$";
d133 13
a145 2
		if (p[0] == '\\' && p[1] == delim)
			++p;
@


1.1
log
@Initial revision
@
text
@d35 2
a36 1
static char sccsid[] = "@@(#)ex_substitute.c	8.33 (Berkeley) 1/9/94";
@


1.1.1.1
log
@nvi 1.03, from ftp.cs.berkeley.edu, per keith bostic's permission.
@
text
@@


1.1.1.2
log
@nvi/nex 1.11beta from bostic.
@
text
@d2 1
a2 1
 * Copyright (c) 1992, 1993, 1994
d35 1
a35 1
static char sccsid[] = "@@(#)ex_subst.c	8.39 (Berkeley) 3/22/94";
a38 2
#include <sys/queue.h>
#include <sys/time.h>
a39 1
#include <bitstring.h>
a41 3
#include <limits.h>
#include <signal.h>
#include <stdio.h>
a43 1
#include <termios.h>
a45 4
#include "compat.h"
#include <db.h>
#include <regex.h>

d48 1
d56 1
d132 2
a133 5
		if (p[0] == '\\')
			if (p[1] == delim)
				++p;
			else if (p[1] == '\\')
				*t++ = *p++;
d190 3
a196 6
	 * The special character ~ (\~ if O_MAGIC not set) inserts the
	 * previous replacement string into this replacement string.
	 * Count ~'s to figure out how much space we need.  We could
	 * special case nonexistent last patterns or whether or not
	 * O_MAGIC is set, but it's probably not worth the effort.
	 *
d200 2
a201 1
	 * if O_MAGIC is set and it escapes a tilde.
d209 5
d226 1
a226 1
				if (p[1] == delim)
d228 1
a228 4
				else if (p[1] == '\\') {
					*t++ = *p++;
					++len;
				} else if (p[1] == '~') {
d298 1
a298 1
/*
d358 1
d362 2
a364 2
	int didsub, do_eol_match, eflags, empty_ok, eval;
	int linechanged, matched, quit, rval, teardown;
d456 2
a457 2
	/* Set up interrupts. */
	teardown = !intr_init(sp);
d621 1
a621 1

d639 1
a639 2
		 * of the last line changed.  We move to the beginning of
		 * the next substitution.
d643 1
a643 1
			sp->cno = lbclen;
d798 3
a800 2
	if (teardown)
		intr_end(sp);
d838 1
a838 1
	 *	\# n-th regular subexpression
d962 22
@


1.1.1.3
log
@new public version of nvi
@
text
@d35 1
a35 1
static const char sccsid[] = "@@(#)ex_subst.c	8.58 (Berkeley) 8/17/94";
d63 3
a65 2
static __inline int	regsub __P((SCR *, char *,
			    char **, size_t *, size_t *, regmatch_t [10]));
a110 7
	 * !!!
	 * The full-blown substitute command reset the remembered
	 * state of the 'c' and 'g' suffices.
	 */
	sp->c_suffix = sp->g_suffix = 0;
	
	/*
d149 5
a153 7
	/*
	 * If the pattern string is empty, use the last RE (not just the
	 * last substitution RE).
	 */
	if (*ptrn == '\0') {
		if (!F_ISSET(sp, S_SRE_SET)) {
			msgq(sp, M_ERR, "No previous regular expression");
d156 1
a156 1
		re = &sp->sre;
a215 6
	 *
	 * !!!
	 * If the entire replacement pattern is "%", then use the last
	 * replacement pattern.  This semantic was added to vi in System
	 * V and then percolated elsewhere, presumably around the time
	 * that it was added to their version of ed(1).
d217 1
a217 3
	if (p[0] == '\0' || p[0] == delim) {
		if (p[0] == delim)
			++p;
d222 1
a222 3
	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
		p += p[1] == delim ? 2 : 1;
	else {
d255 6
a260 9
		if ((sp->repl_len = len) != 0) {
			if (sp->repl != NULL)
				free(sp->repl);
			if ((sp->repl = malloc(len)) == NULL) {
				msgq(sp, M_SYSERR, NULL);
				FREE_SPACE(sp, bp, blen);
				return (1);
			}
			memmove(sp->repl, bp, len);
d262 2
d266 3
d285 1
a285 1
		msgq(sp, M_ERR, "No previous regular expression");
d304 1
a304 1
		msgq(sp, M_ERR, "No previous regular expression");
a371 1
	regmatch_t match[10];
d373 1
a373 1
	int cflag, lflag, nflag, pflag, rflag;
d375 1
a375 1
	int linechanged, matched, quit, rval;
a378 11
	 * !!!
	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
	 * not set, they were initialized to 0 for all substitute commands.  If
	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
	 * specified substitute/replacement patterns (see ex_substitute()).
	 */
	if (!O_ISSET(sp, O_EDCOMPATIBLE))
		sp->c_suffix = sp->g_suffix = 0;

	/*
d394 1
a394 1
	cflag = lflag = nflag = pflag = rflag = 0;
a398 6
			continue;
		case '+':
			++cmdp->flagoff;
			break;
		case '-':
			--cmdp->flagoff;
d410 1
a410 1
					msgq(sp, M_ERR, "Count overflow");
d412 1
a412 1
					msgq(sp, M_ERR, "Count underflow");
d428 1
a428 1
			sp->c_suffix = !sp->c_suffix;
d431 1
a431 1
			sp->g_suffix = !sp->g_suffix;
d442 1
a442 1
		    "Regular expression specified; r flag meaningless");
d445 1
a445 1
			if (!F_ISSET(sp, S_SRE_SET)) {
d447 1
a447 1
				    "No previous regular expression");
a450 1
			re = &sp->sre;
d461 1
a461 1
	if (IN_VI_MODE(sp) && sp->c_suffix && (lflag || nflag || pflag)) {
d463 1
a463 1
	"The #, l and p flags may not be combined with the c flag in vi mode");
d467 3
d485 1
a485 1
		if (INTERRUPTED(sp)) {
d487 1
a487 1
				msgq(sp, M_INFO, "Interrupted");
d494 1
a494 1
			goto ret1;
d501 1
a501 1
		if (sp->c_suffix) {
d537 3
a539 3
		 * Generally, match[0].rm_so is the offset of the start
		 * of the match from the start of the search, and offset
		 * is the offset of the start of the last search.
d541 2
a542 2
nextmatch:	match[0].rm_so = 0;
		match[0].rm_eo = len;
d545 2
a546 1
		eval = regexec(re, (char *)s + offset, 10, match, eflags);
d584 2
a585 1
		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
d596 1
a596 1
		if (sp->c_suffix) {
d608 2
a609 2
			from.cno = match[0].rm_so + offset;
			to.cno = match[0].rm_eo;
d623 1
a623 1
				BUILD(sp, s +offset, match[0].rm_eo);
d642 15
a656 1
		BUILD(sp, s + offset, match[0].rm_so);
d660 1
a660 1
		if (regsub(sp, s + offset, &lb, &lbclen, &lblen, match))
d667 2
a668 2
skip:		offset += match[0].rm_eo;
		len -= match[0].rm_eo;
d678 1
a678 1
		if (didsub && sp->c_suffix && sp->g_suffix) {
d745 1
a745 1
		if (sp->g_suffix && do_eol_match) {
d780 1
a780 4
		if (sp->rptlchange != lno) {
			sp->rptlchange = lno;
			++sp->rptlines[L_CHANGED];
		}
d782 1
a782 6
		/*
		 * !!!
		 * Display as necessary.  Historic practice is to only
		 * display the last line of a line split into multiple
		 * lines.
		 */
a792 23

		if (!sp->c_suffix)
			sp->lno = lno;

		/*
		 * !!!
		 * Move the cursor to the last line changed.
		 */
		if (!sp->c_suffix)
			sp->lno = lno;
	}

	/*
	 * !!!
	 * Move the cursor to the first non-blank of the last line change.
	 *
	 * XXX
	 * This is NOT backward compatible with historic vi, which always
	 * moved to the last line actually changed.
	 */
	if (!sp->c_suffix) {
		sp->cno = 0;
		(void)nonblank(sp, ep, sp->lno, &sp->cno);
d801 1
a801 1
			msgq(sp, M_INFO, "No match found");
d810 3
a814 2
	if (lb != NULL)
		free(lb);
d822 2
a823 2
static __inline int
regsub(sp, ip, lbp, lbclenp, lblenp, match)
a827 1
	regmatch_t match[10];
d862 1
a862 1
	u_int __value = KEY_VAL(sp, __ch);				\
a904 1
				++rp;
d906 1
d914 6
a919 5
subzero:			if (match[no].rm_so == -1 ||
			    	    match[no].rm_eo == -1)
					break;
				mlen = match[no].rm_eo - match[no].rm_so;
				for (t = ip + match[no].rm_so; mlen--; ++t)
d954 18
@


1.1.1.4
log
@new public version of nvi
@
text
@d35 1
a35 1
static char sccsid[] = "@@(#)ex_subst.c	8.59 (Berkeley) 8/17/94";
@


1.1.1.5
log
@import of nvi 1.66
@
text
@a3 2
 * Copyright (c) 1992, 1993, 1994, 1995, 1996
 *	Keith Bostic.  All rights reserved.
d5 27
a31 1
 * See the LICENSE file for redistribution information.
a33 2
#include "config.h"

d35 1
a35 1
static const char sccsid[] = "@@(#)ex_subst.c	10.30 (Berkeley) 5/16/96";
d46 1
d50 1
d53 6
a58 2
#include "../common/common.h"
#include "../vi/vi.h"
d63 4
a66 6
static int re_conv __P((SCR *, char **, int *));
static int re_cscope_conv __P((SCR *, char **, int *));
static int re_sub __P((SCR *,
		char *, char **, size_t *, size_t *, regmatch_t [10]));
static int re_tag_conv __P((SCR *, char **, int *));
static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));
d69 1
a69 1
 * ex_s --
a72 2
 *
 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
d75 1
a75 1
ex_s(sp, cmdp)
d77 2
a78 1
	EXCMD *cmdp;
d80 1
a80 1
	regex_t *re;
d83 1
a83 1
	int delim;
a96 2
	if (cmdp->argc == 0)
		goto subagain;
d103 1
a103 2
subagain:	return (ex_subagain(sp, cmdp));

d105 3
a107 2
	if (isalnum(delim) || delim == '\\')
		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
d115 1
a115 1

d160 2
a161 2
		if (sp->re == NULL) {
			ex_emsg(sp, NULL, EXM_NOPREVRE);
d164 9
d174 2
a175 3
		/* Compile the RE if necessary. */
		if (!F_ISSET(sp, SC_RE_SEARCH) &&
		    re_compile(sp, sp->re, NULL, NULL, &sp->re_c, RE_C_SEARCH))
d177 13
a189 2
		flags = 0;
	} else {
d191 2
d194 2
a195 4
		 * Compile the RE.  Historic practice is that substitutes set
		 * the search direction as well as both substitute and search
		 * RE's.  We compile the RE twice, as we don't want to bother
		 * ref counting the pattern string and (opaque) structure.
d197 7
a203 7
		if (re_compile(sp,
		    ptrn, &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
			return (1);
		if (re_compile(sp,
		    ptrn, &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
			return (1);
		
a204 1
		sp->searchdir = FORWARD;
a205 1
	re = &sp->re_c;
d212 1
a212 1
	 * re_sub().
d235 1
a235 1
			free(sp->repl);
d285 1
a285 1
	return (s(sp, cmdp, p, re, flags));
a292 2
 *
 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
d295 1
a295 1
ex_subagain(sp, cmdp)
d297 2
a298 1
	EXCMD *cmdp;
d300 2
a301 2
	if (sp->subre == NULL) {
		ex_emsg(sp, NULL, EXM_NOPREVRE);
d304 1
a304 5
	if (!F_ISSET(sp, SC_RE_SUBST) &&
	    re_compile(sp, sp->subre, NULL, NULL, &sp->subre_c, RE_C_SUBST))
		return (1);
	return (s(sp,
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
a311 2
 *
 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
d314 1
a314 1
ex_subtilde(sp, cmdp)
d316 2
a317 1
	EXCMD *cmdp;
d319 2
a320 2
	if (sp->re == NULL) {
		ex_emsg(sp, NULL, EXM_NOPREVRE);
d323 1
a323 5
	if (!F_ISSET(sp, SC_RE_SEARCH) &&
	    re_compile(sp, sp->re, NULL, NULL, &sp->re_c, RE_C_SEARCH))
		return (1);
	return (s(sp,
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
a326 5
 * s --
 * Do the substitution.  This stuff is *really* tricky.  There are lots of
 * special cases, and general nastiness.  Don't mess with it unless you're
 * pretty confident.
 * 
d331 1
a331 1
 * when the replacement is done.  Don't change it unless you're *damned*
d371 6
d378 1
a378 1
s(sp, cmdp, s, re, flags)
d380 2
a381 1
	EXCMD *cmdp;
a385 1
	EVENT ev;
d387 1
a387 2
	TEXTH tiq;
	recno_t elno, lno, slno;
d389 1
a389 2
	size_t blen, cnt, last, lbclen, lblen, len, llen;
	size_t offset, saved_offset, scno;
a394 5
	NEEDFILE(sp, cmdp);

	slno = sp->lno;
	scno = sp->cno;

d401 1
a401 1
	 * specified substitute/replacement patterns (see ex_s()).
a422 2
	if (s == NULL)
		goto noargs;
d444 1
a444 1
					msgq(sp, M_ERR, "153|Count overflow");
d446 1
a446 1
					msgq(sp, M_ERR, "154|Count underflow");
a456 3
			if (!db_exist(sp, cmdp->addr2.lno) &&
			    db_last(sp, &cmdp->addr2.lno))
				return (1);
a462 6

			/* Ex text structure initialization. */
			if (F_ISSET(sp, SC_EX)) {
				memset(&tiq, 0, sizeof(TEXTH));
				CIRCLEQ_INIT(&tiq);
			}
d476 1
a476 1
		    "155|Regular expression specified; r flag meaningless");
d479 3
a481 2
			if (!F_ISSET(sp, SC_RE_SEARCH)) {
				ex_emsg(sp, NULL, EXM_NOPREVRE);
d485 1
a485 1
			re = &sp->re_c;
d492 1
a492 1
usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
d496 1
a496 1
noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
d498 1
a498 1
"156|The #, l and p flags may not be combined with the c flag in vi mode");
d517 3
a519 1
		if (INTERRUPTED(sp))
d521 1
d524 4
a527 2
		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
			goto err;
d588 1
a588 1
			goto err;
d631 5
d639 1
a639 5
			to.cno = match[0].rm_eo + offset;
			/*
			 * Both ex and vi have to correct for a change before
			 * the first character in the line.
			 */
d642 1
a642 9
			if (F_ISSET(sp, SC_VI)) {
				/*
				 * Only vi has to correct for a change after
				 * the last character in the line.
				 *
				 * XXX
				 * It would be nice to change the vi code so
				 * that we could display a cursor past EOL.
				 */
a646 29

				sp->lno = from.lno;
				sp->cno = from.cno;
				if (vs_refresh(sp, 1))
					goto err;

				vs_update(sp, msg_cat(sp,
				    "169|Confirm change? [n]", NULL), NULL);

				if (v_event_get(sp, &ev, 0, 0))
					goto err;
				switch (ev.e_event) {
				case E_CHARACTER:
					break;
				case E_EOF:
				case E_ERR:
				case E_INTERRUPT:
					goto lquit;
				default:
					v_event_err(sp, &ev);
					goto lquit;
				}
			} else {
				if (ex_print(sp, cmdp, &from, &to, 0) ||
				    ex_scprint(sp, &from, &to))
					goto lquit;
				if (ex_txt(sp, &tiq, 0, TXT_CR))
					goto err;
				ev.e_c = tiq.cqh_first->lb[0];
d648 2
a649 3

			switch (ev.e_c) {
			case CH_YES:
d651 1
a651 2
			default:
			case CH_NO:
d655 7
a661 4
			case CH_QUIT:
				/* Set the quit/interrupted flags. */
lquit:				quit = 1;
				F_SET(sp->gp, G_INTERRUPTED);
d664 2
a665 2
				 * Resolve any changes, then return to (and
				 * exit from) the main loop.
a670 7
		/*
		 * Set the cursor to the last position changed, converting
		 * from 1-based to 0-based.
		 */
		sp->lno = lno;
		sp->cno = match[0].rm_so;

d676 2
a677 2
		if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
			goto err;
d713 1
a713 1
					if (db_insert(sp, lno,
d715 1
a715 1
						goto err;
d725 6
a730 4
			if (db_set(sp, lno, lb + last, lbclen))
				goto err;
			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
				goto err;
d781 1
a781 1
				if (db_insert(sp,
d783 1
a783 1
					goto err;
d792 2
a793 2
		if (db_set(sp, lno, lb + last, lbclen))
			goto err;
d811 1
a811 1
				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
d813 1
a813 1
				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
d815 1
a815 1
				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
d817 10
d831 1
a831 7
	 * Historically, vi attempted to leave the cursor at the same place if
	 * the substitution was done at the current cursor position.  Otherwise
	 * it moved it to the first non-blank of the last line changed.  There
	 * were some problems: for example, :s/$/foo/ with the cursor on the
	 * last character of the line left the cursor on the last character, or
	 * the & command with multiple occurrences of the matching string in the
	 * line usually left the cursor in a fairly random position.
d833 3
a835 5
	 * We try to do the same thing, with the exception that if the user is
	 * doing substitution with confirmation, we move to the last line about
	 * which the user was consulted, as opposed to the last line that they
	 * actually changed.  This prevents a screen flash if the user doesn't
	 * change many of the possible lines.
d837 1
a837 1
	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
d839 1
a839 1
		(void)nonblank(sp, sp->lno, &sp->cno);
a845 1
	rval = 0;
d847 2
a848 4
		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
			msgq(sp, M_ERR, "157|No match found");
			goto err;
		}
d850 1
a850 1
		F_SET(cmdp, E_AUTOPRINT);
d852 1
d854 1
a854 1
err:		rval = 1;
d865 1
a865 404
 * re_compile --
 *	Compile the RE.
 *
 * PUBLIC: int re_compile __P((SCR *,
 * PUBLIC:     char *, char **, size_t *, regex_t *, u_int));
 */
int
re_compile(sp, ptrn, ptrnp, lenp, rep, flags)
	SCR *sp;
	char *ptrn, **ptrnp;
	size_t *lenp;
	regex_t *rep;
	u_int flags;
{
	size_t len;
	int reflags, replaced, rval;
	char *p;

	/* Set RE flags. */
	reflags = 0;
	if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
		if (O_ISSET(sp, O_EXTENDED))
			reflags |= REG_EXTENDED;
		if (O_ISSET(sp, O_IGNORECASE))
			reflags |= REG_ICASE;
		if (O_ISSET(sp, O_ICLOWER)) {
			for (p = ptrn; *p != '\0'; ++p)
				if (isupper(*p))
					break;
			if (*p == '\0')
				reflags |= REG_ICASE;
		}
	}

	/* If we're replacing a saved value, clear the old one. */
	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
		regfree(&sp->re_c);
		F_CLR(sp, SC_RE_SEARCH);
	}
	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
		regfree(&sp->subre_c);
		F_CLR(sp, SC_RE_SUBST);
	}

	/*
	 * If we're saving the string, it's a pattern we haven't seen before,
	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
	 * later recompilation.   Free any previously saved value.
	 */
	replaced = 0;
	if (ptrnp != NULL) {
		if (*ptrnp != NULL) {
			free(*ptrnp);
			*ptrnp = NULL;
		}
		if (LF_ISSET(RE_C_CSCOPE)) {
			if (re_cscope_conv(sp, &ptrn, &replaced))
				return (1);
			/*
			 * XXX
			 * Currently, the match-any-<blank> expression used in
			 * re_cscope_conv() requires extended RE's.  This may
			 * not be right or safe.
			 */
			reflags |= REG_EXTENDED;
		} else if (LF_ISSET(RE_C_TAG)) {
			if (re_tag_conv(sp, &ptrn, &replaced))
				return (1);
		} else
			if (re_conv(sp, &ptrn, &replaced))
				return (1);
		len = strlen(ptrn);
		if (lenp != NULL)
			*lenp = len;
		if ((*ptrnp = v_strdup(sp, ptrn, len)) == NULL)
			return (1);
	}

	rval = regcomp(rep, ptrn, reflags);

	/* Free up any allocated memory. */
	if (replaced)
		FREE_SPACE(sp, ptrn, 0);

	if (rval) {
		if (!LF_ISSET(RE_C_SILENT))
			re_error(sp, rval, rep); 
		return (1);
	}

	if (LF_ISSET(RE_C_SEARCH))
		F_SET(sp, SC_RE_SEARCH);
	if (LF_ISSET(RE_C_SUBST))
		F_SET(sp, SC_RE_SUBST);

	return (0);
}

/*
 * re_conv --
 *	Convert vi's regular expressions into something that the
 *	the POSIX 1003.2 RE functions can handle.
 *
 * There are three conversions we make to make vi's RE's (specifically
 * the global, search, and substitute patterns) work with POSIX RE's.
 *
 * 1: If O_MAGIC is not set, strip backslashes from the magic character
 *    set (.[*~) that have them, and add them to the ones that don't.
 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
 *    from the last substitute command's replacement string.  If O_MAGIC
 *    is set, it's the string "~".
 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
 *    new RE escapes.
 *
 * !!!/XXX
 * This doesn't exactly match the historic behavior of vi because we do
 * the ~ substitution before calling the RE engine, so magic characters
 * in the replacement string will be expanded by the RE engine, and they
 * weren't historically.  It's a bug.
 */
static int
re_conv(sp, ptrnp, replacedp)
	SCR *sp;
	char **ptrnp;
	int *replacedp;
{
	size_t blen, needlen;
	int magic;
	char *bp, *p, *t;

	/*
	 * First pass through, we figure out how much space we'll need.
	 * We do it in two passes, on the grounds that most of the time
	 * the user is doing a search and won't have magic characters.
	 * That way we can skip the malloc and memmove's.
	 */
	for (p = *ptrnp, magic = 0, needlen = 0; *p != '\0'; ++p)
		switch (*p) {
		case '\\':
			switch (*++p) {
			case '<':
				magic = 1;
				needlen += sizeof(RE_WSTART);
				break;
			case '>':
				magic = 1;
				needlen += sizeof(RE_WSTOP);
				break;
			case '~':
				if (!O_ISSET(sp, O_MAGIC)) {
					magic = 1;
					needlen += sp->repl_len;
				}
				break;
			case '.':
			case '[':
			case '*':
				if (!O_ISSET(sp, O_MAGIC)) {
					magic = 1;
					needlen += 1;
				}
				break;
			default:
				needlen += 2;
			}
			break;
		case '~':
			if (O_ISSET(sp, O_MAGIC)) {
				magic = 1;
				needlen += sp->repl_len;
			}
			break;
		case '.':
		case '[':
		case '*':
			if (!O_ISSET(sp, O_MAGIC)) {
				magic = 1;
				needlen += 2;
			}
			break;
		default:
			needlen += 1;
			break;
		}

	if (!magic) {
		*replacedp = 0;
		return (0);
	}

	/*
	 * Get enough memory to hold the final pattern.
	 *
	 * XXX
	 * It's nul-terminated, for now.
	 */
	GET_SPACE_RET(sp, bp, blen, needlen + 1);

	for (p = *ptrnp, t = bp; *p != '\0'; ++p)
		switch (*p) {
		case '\\':
			switch (*++p) {
			case '<':
				memmove(t, RE_WSTART, sizeof(RE_WSTART) - 1);
				t += sizeof(RE_WSTART) - 1;
				break;
			case '>':
				memmove(t, RE_WSTOP, sizeof(RE_WSTOP) - 1);
				t += sizeof(RE_WSTOP) - 1;
				break;
			case '~':
				if (O_ISSET(sp, O_MAGIC))
					*t++ = '~';
				else {
					memmove(t, sp->repl, sp->repl_len);
					t += sp->repl_len;
				}
				break;
			case '.':
			case '[':
			case '*':
				if (O_ISSET(sp, O_MAGIC))
					*t++ = '\\';
				*t++ = *p;
				break;
			default:
				*t++ = '\\';
				*t++ = *p;
			}
			break;
		case '~':
			if (O_ISSET(sp, O_MAGIC)) {
				memmove(t, sp->repl, sp->repl_len);
				t += sp->repl_len;
			} else
				*t++ = '~';
			break;
		case '.':
		case '[':
		case '*':
			if (!O_ISSET(sp, O_MAGIC))
				*t++ = '\\';
			*t++ = *p;
			break;
		default:
			*t++ = *p;
			break;
		}
	*t = '\0';

	*ptrnp = bp;
	*replacedp = 1;
	return (0);
}

/*
 * re_tag_conv --
 *	Convert a tags search path into something that the POSIX
 *	1003.2 RE functions can handle.
 */
static int
re_tag_conv(sp, ptrnp, replacedp)
	SCR *sp;
	char **ptrnp;
	int *replacedp;
{
	size_t blen, len;
	int lastdollar;
	char *bp, *p, *t;

	*replacedp = 0;

	len = strlen(p = *ptrnp);

	/* Max memory usage is 2 times the length of the string. */
	GET_SPACE_RET(sp, bp, blen, len * 2);

	t = bp;

	/* The last character is a '/' or '?', we just strip it. */
	if (p[len - 1] == '/' || p[len - 1] == '?')
		p[len - 1] = '\0';

	/* The next-to-last character is a '$', and it's magic. */
	if (p[len - 2] == '$') {
		lastdollar = 1;
		p[len - 2] = '\0';
	} else
		lastdollar = 0;

	/* The first character is a '/' or '?', we just strip it. */
	if (p[0] == '/' || p[0] == '?')
		++p;

	/* The second character is a '^', and it's magic. */
	if (p[0] == '^')
		*t++ = *p++;

	/*
	 * Escape every other magic character we can find, stripping the
	 * backslashes ctags inserts to escape the search delimiter
	 * characters.
	 */
	while (p[0]) {
		/* Ctags escapes the search delimiter characters. */
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?'))
			++p;
		else if (strchr("^.[]$*", p[0]))
			*t++ = '\\';
		*t++ = *p++;
	}
	if (lastdollar)
		*t++ = '$';
	*t++ = '\0';

	*ptrnp = bp;
	*replacedp = 1;
	return (0);
}

/*
 * re_cscope_conv --
 *	 Convert a cscope search path into something that the POSIX
 *      1003.2 RE functions can handle.
 */
static int
re_cscope_conv(sp, ptrnp, replacedp)
	SCR *sp;
	char **ptrnp;
	int *replacedp;
{
	size_t blen, len, nspaces;
	char *bp, *p, *re;

	/*
	 * Each space in the source line printed by cscope represents an
	 * arbitrary sequence of spaces, tabs, and comments.
	 */
#define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
	for (nspaces = 0, p = *ptrnp; *p != '\0'; ++p)
		if (*p == ' ')
			++nspaces;

	/*
	 * Allocate plenty of space:
	 *	the string, plus potential escaping characters
	 *	nspaces + 2 copies of CSCOPE_RE_SPACE
	 *	^, $, nul terminator characters
	 */
	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
	GET_SPACE_RET(sp, bp, blen, len);

	p = bp;
	*p++ = '^';
	memcpy(p, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	p += sizeof(CSCOPE_RE_SPACE) - 1;

	for (re = *ptrnp; *re != '\0'; ++re)
		if (*re == ' ') {
			memcpy(p, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
			p += sizeof(CSCOPE_RE_SPACE) - 1;
		} else {
			if (strchr("\\^.[]$*", *re))
				*p++ = '\\';
			*p++ = *re;
		}

	memcpy(p, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	p += sizeof(CSCOPE_RE_SPACE) - 1;
	*p++ = '$';
	*p = '\0';

	*ptrnp = bp;
	*replacedp = 1;
	return (0);
}

/*
 * re_error --
 *	Report a regular expression error.
 *
 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
 */
void
re_error(sp, errcode, preg)
	SCR *sp;
	int errcode;
	regex_t *preg;
{
	size_t s;
	char *oe;

	s = regerror(errcode, preg, "", 0);
	if ((oe = malloc(s)) == NULL)
		msgq(sp, M_SYSERR, NULL);
	else {
		(void)regerror(errcode, preg, oe, s);
		msgq(sp, M_ERR, "RE error: %s", oe);
		free(oe);
	}
}

/*
 * re_sub --
d868 2
a869 2
static int
re_sub(sp, ip, lbp, lbclenp, lblenp, match)
d907 1
a907 1
#define	OUTCH(ch) {							\
d966 1
a966 1
					OUTCH(*t);
d994 1
a994 1
		OUTCH(ch);
@


1.1.1.6
log
@import of nvi 1.79
@
text
@d13 1
a13 1
static const char sccsid[] = "@@(#)ex_subst.c	10.37 (Berkeley) 9/15/96";
d35 2
a36 2
static int re_conv __P((SCR *, char **, size_t *, int *));
static int re_cscope_conv __P((SCR *, char **, size_t *, int *));
d39 1
a39 1
static int re_tag_conv __P((SCR *, char **, size_t *, int *));
d94 1
a94 1
	 * Get the pattern string, toss escaping characters.
d106 1
a106 1
	 * Only toss an escaping character if it escapes a delimiter.
d109 1
a109 1
	 * escaping a single escaping character is removed, but that's
d142 3
a144 3
		/* Re-compile the RE if necessary. */
		if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
		    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
d155 2
a156 2
		if (re_compile(sp, ptrn, t - ptrn,
		    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
d158 2
a159 2
		if (re_compile(sp, ptrn, t - ptrn,
		    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
d182 1
a182 1
	 * Only toss an escaping character if it escapes a delimiter or
d225 1
a225 1
				memcpy(t, sp->repl, sp->repl_len);
d241 1
a241 1
			memcpy(sp->repl, bp, len);
d265 2
a266 2
	if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
	    sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
d289 2
a290 2
	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
	    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
d330 1
a330 1
	memcpy(lb + lbclen, l, len);					\
d519 1
a519 1
			memcpy(bp, s, llen);
d753 1
a753 1
			memcpy(bp, s, llen);
d890 1
a890 1
 * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int));
d893 1
a893 1
re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
d896 1
a896 1
	size_t plen, *lenp;
d912 1
a912 1
			for (p = ptrn, len = plen; len > 0; ++p, --len)
d915 1
a915 1
			if (len == 0)
d935 1
d937 4
d942 1
a942 1
			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
d952 1
a952 1
			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
d955 1
a955 1
			if (re_conv(sp, &ptrn, &plen, &replaced))
d957 1
a957 6

		/* Discard previous pattern. */
		if (*ptrnp != NULL) {
			free(*ptrnp);
			*ptrnp = NULL;
		}
d959 4
a962 18
			*lenp = plen;

		/*
		 * Copy the string into allocated memory.
		 *
		 * XXX
		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
		 * for now.  There's just no other solution.  
		 */
		MALLOC(sp, *ptrnp, char *, plen + 1);
		if (*ptrnp != NULL) {
			memcpy(*ptrnp, ptrn, plen);
			(*ptrnp)[plen] = '\0';
		}

		/* Free up conversion-routine-allocated memory. */
		if (replaced)
			FREE_SPACE(sp, ptrn, 0);
d964 1
a964 2
		if (*ptrnp == NULL)
			return (1);
d966 3
a968 2
		ptrn = *ptrnp;
	}
d970 1
a970 6
	/*
	 * XXX
	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
	 * contained a nul.  Bummer!
	 */
	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
d1007 1
a1007 1
re_conv(sp, ptrnp, plenp, replacedp)
a1009 1
	size_t *plenp;
d1012 1
a1012 1
	size_t blen, len, needlen;
d1020 1
a1020 1
	 * That way we can skip most of the memory allocation and copies.
d1022 1
a1022 2
	magic = 0;
	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
d1025 11
a1035 4
			if (len > 1) {
				--len;
				switch (*++p) {
				case '<':
d1037 7
a1043 3
					needlen += sizeof(RE_WSTART);
					break;
				case '>':
d1045 1
a1045 18
					needlen += sizeof(RE_WSTOP);
					break;
				case '~':
					if (!O_ISSET(sp, O_MAGIC)) {
						magic = 1;
						needlen += sp->repl_len;
					}
					break;
				case '.':
				case '[':
				case '*':
					if (!O_ISSET(sp, O_MAGIC)) {
						magic = 1;
						needlen += 1;
					}
					break;
				default:
					needlen += 2;
d1047 4
a1050 2
			} else
				needlen += 1;
d1076 7
a1082 3
	/* Get enough memory to hold the final pattern. */
	*replacedp = 1;
	GET_SPACE_RET(sp, bp, blen, needlen);
d1084 1
a1084 1
	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
d1087 21
a1107 30
			if (len > 1) {
				--len;
				switch (*++p) {
				case '<':
					memcpy(t,
					    RE_WSTART, sizeof(RE_WSTART) - 1);
					t += sizeof(RE_WSTART) - 1;
					break;
				case '>':
					memcpy(t,
					    RE_WSTOP, sizeof(RE_WSTOP) - 1);
					t += sizeof(RE_WSTOP) - 1;
					break;
				case '~':
					if (O_ISSET(sp, O_MAGIC))
						*t++ = '~';
					else {
						memcpy(t,
						    sp->repl, sp->repl_len);
						t += sp->repl_len;
					}
					break;
				case '.':
				case '[':
				case '*':
					if (O_ISSET(sp, O_MAGIC))
						*t++ = '\\';
					*t++ = *p;
					break;
				default:
d1109 3
a1111 3
					*t++ = *p;
				}
			} else
d1113 2
d1118 1
a1118 1
				memcpy(t, sp->repl, sp->repl_len);
d1134 1
d1137 1
a1137 1
	*plenp = t - bp;
d1147 1
a1147 1
re_tag_conv(sp, ptrnp, plenp, replacedp)
a1149 1
	size_t *plenp;
d1156 3
a1158 1
	len = *plenp;
a1160 1
	*replacedp = 1;
a1162 1
	p = *ptrnp;
d1165 6
a1170 7
	/* If the last character is a '/' or '?', we just strip it. */
	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
		--len;

	/* If the next-to-last or last character is a '$', it's magic. */
	if (len > 0 && p[len - 1] == '$') {
		--len;
d1172 1
d1176 2
a1177 2
	/* If the first character is a '/' or '?', we just strip it. */
	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
a1178 2
		--len;
	}
d1180 2
a1181 2
	/* If the first or second character is a '^', it's magic. */
	if (p[0] == '^') {
a1182 2
		--len;
	}
d1185 2
a1186 2
	 * Escape every other magic character we can find, meanwhile stripping
	 * the backslashes ctags inserts when escaping the search delimiter
d1189 3
a1191 2
	for (; len > 0; --len) {
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
d1193 1
a1193 2
			--len;
		} else if (strchr("^.[]$*", p[0]))
d1199 1
d1202 1
a1202 1
	*plenp = t - bp;
d1212 1
a1212 1
re_cscope_conv(sp, ptrnp, plenp, replacedp)
a1214 1
	size_t *plenp;
d1218 1
a1218 1
	char *bp, *p, *t;
d1225 1
a1225 1
	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
d1231 3
a1233 3
	 *	the string, plus potential escaping characters;
	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
	 *	^, $, nul terminator characters.
a1234 1
	*replacedp = 1;
d1238 9
a1246 11
	p = *ptrnp;
	t = bp;

	*t++ = '^';
	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	t += sizeof(CSCOPE_RE_SPACE) - 1;

	for (len = *plenp; len > 0; ++p, --len)
		if (*p == ' ') {
			memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
			t += sizeof(CSCOPE_RE_SPACE) - 1;
d1248 3
a1250 3
			if (strchr("\\^.[]$*+?()|{}", *p))
				*t++ = '\\';
			*t++ = *p;
d1253 4
a1256 3
	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
	t += sizeof(CSCOPE_RE_SPACE) - 1;
	*t++ = '$';
d1259 1
a1259 1
	*plenp = t - bp;
d1329 1
a1329 1
	 * all escaping characters.  This (hopefully) matches historic practice.
d1331 1
a1331 1
#define	OUTCH(ch, nltrans) {						\
d1334 1
a1334 1
	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
d1390 1
a1390 1
					OUTCH(*t, 0);
d1418 1
a1418 1
		OUTCH(ch, 1);
@

