head 1.4; access; symbols pkgsrc-2013Q2:1.4.0.54 pkgsrc-2013Q2-base:1.4 pkgsrc-2012Q4:1.4.0.52 pkgsrc-2012Q4-base:1.4 pkgsrc-2011Q4:1.4.0.50 pkgsrc-2011Q4-base:1.4 pkgsrc-2011Q2:1.4.0.48 pkgsrc-2011Q2-base:1.4 pkgsrc-2009Q4:1.4.0.46 pkgsrc-2009Q4-base:1.4 pkgsrc-2008Q4:1.4.0.44 pkgsrc-2008Q4-base:1.4 pkgsrc-2008Q3:1.4.0.42 pkgsrc-2008Q3-base:1.4 cube-native-xorg:1.4.0.40 cube-native-xorg-base:1.4 pkgsrc-2008Q2:1.4.0.38 pkgsrc-2008Q2-base:1.4 pkgsrc-2008Q1:1.4.0.36 pkgsrc-2008Q1-base:1.4 pkgsrc-2007Q4:1.4.0.34 pkgsrc-2007Q4-base:1.4 pkgsrc-2007Q3:1.4.0.32 pkgsrc-2007Q3-base:1.4 pkgsrc-2007Q2:1.4.0.30 pkgsrc-2007Q2-base:1.4 pkgsrc-2007Q1:1.4.0.28 pkgsrc-2007Q1-base:1.4 pkgsrc-2006Q4:1.4.0.26 pkgsrc-2006Q4-base:1.4 pkgsrc-2006Q3:1.4.0.24 pkgsrc-2006Q3-base:1.4 pkgsrc-2006Q2:1.4.0.22 pkgsrc-2006Q2-base:1.4 pkgsrc-2006Q1:1.4.0.20 pkgsrc-2006Q1-base:1.4 pkgsrc-2005Q4:1.4.0.18 pkgsrc-2005Q4-base:1.4 pkgsrc-2005Q3:1.4.0.16 pkgsrc-2005Q3-base:1.4 pkgsrc-2005Q2:1.4.0.14 pkgsrc-2005Q2-base:1.4 pkgsrc-2005Q1:1.4.0.12 pkgsrc-2005Q1-base:1.4 pkgsrc-2004Q4:1.4.0.10 pkgsrc-2004Q4-base:1.4 pkgsrc-2004Q3:1.4.0.8 pkgsrc-2004Q3-base:1.4 pkgsrc-2004Q2:1.4.0.6 pkgsrc-2004Q2-base:1.4 pkgsrc-2004Q1:1.4.0.4 pkgsrc-2004Q1-base:1.4 pkgsrc-2003Q4:1.4.0.2 pkgsrc-2003Q4-base:1.4 buildlink2:1.3.0.2 buildlink2-base:1.4 netbsd-1-5-PATCH003:1.3 netbsd-1-5-PATCH001:1.3 netbsd-1-5-RELEASE:1.3 netbsd-1-4-PATCH003:1.3 netbsd-1-4-PATCH002:1.3 comdex-fall-1999:1.3 netbsd-1-4-PATCH001:1.2 FreeBSD-current-1999-06-07:1.1.1.1 FREEBSD:1.1.1; locks; strict; comment @# @; 1.4 date 2002.05.31.15.36.21; author seb; state dead; branches; next 1.3; 1.3 date 99.08.27.01.34.11; author sakamoto; state Exp; branches 1.3.2.1; next 1.2; 1.2 date 99.06.08.09.44.46; author sakamoto; state Exp; branches; next 1.1; 1.1 date 99.06.08.09.37.37; author sakamoto; state Exp; branches 1.1.1.1; next ; 1.3.2.1 date 2002.06.23.18.49.36; author jlam; state dead; branches; next ; 1.1.1.1 date 99.06.08.09.37.37; author sakamoto; state Exp; branches; next ; desc @@ 1.4 log @Remove all package and category files from the japanese category. This effectively retire the japanese category. @ text @$NetBSD: patch-aa,v 1.3 1999/08/27 01:34:11 sakamoto Exp $ --- doc/Makefile.in.orig Fri Aug 27 09:21:12 1999 +++ doc/Makefile.in Fri Aug 27 09:22:13 1999 @@@@ -186,11 +186,4 @@@@ done @@$(POST_INSTALL) - @@if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \ - list='$(INFO_DEPS)'; \ - for file in $$list; do \ - echo " install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file";\ - install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file || :;\ - done; \ - else : ; fi uninstall-info: @ 1.3 log @Update multibyte patch sedmb107 to sedmb108. FIX: multibyte handling with direct input from file. @ text @d1 1 a1 1 $NetBSD$ @ 1.3.2.1 log @Merge from pkgsrc-current to buildlink2 branch. @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.3 1999/08/27 01:34:11 sakamoto Exp $ @ 1.2 log @Adapt this package to NetBSD. Update ja-sed 1.18 to 3.02. @ text @d3 3 a5 4 --- doc/Makefile.in.orig Tue Jun 8 10:52:07 1999 +++ doc/Makefile.in Tue Jun 8 10:53:41 1999 @@@@ -163,12 +163,6 @@@@ done; \ d9 4 a12 3 - for file in $(INFO_DEPS); do \ - echo " install-info --info-dir=$(infodir) $(infodir)/$$file";\ - install-info --info-dir=$(infodir) $(infodir)/$$file || :;\ a16 1 $(PRE_UNINSTALL) @ 1.1 log @Initial revision @ text @d1 1 a1 2 $B!|!|!|!|!|(B GNU sed version 1.18 + multi-byte extension 1.03 $B!|!|!|!|!|(B $B!|!|!|!|!|(B Jun. 2, 1994 by t^2 $B!|!|!|!|!|(B d3 12 a14 2211 $B$3$N%U%!%$%k$O(B GNU sed version 1.18 (sed-1.18) $B$N%=!<%9%3!<%I$+$i(B, $B$=(B $B$N%^%k%A%P%$%HJ8;zBP1~HG(B sed-1.18+mb1.03 $B$N%=!<%9%3!<%I$r@@8@@.$9$k$?$a(B $B$N:9J,$r4^$s$G$$$^$9(B. sed-1.18 $B$N%=!<%9$rE83+$7$F$"$k%G%#%l%/%H%j$G(B % patch -p1 < $B$3$N%U%!%$%k(B $B$J$I$H$7$F%Q%C%A$rEv$F$F$/$@@$5$$(B. $B$=$N8e(B README.MB $B$rFI$s$G$/$@@$5$$(B. $B")(B810 $BJ!2,;TCf1{6hG_8w1`CDCO(B 7-207 TEL/FAX: 092-731-4025 (TEL/FAX $B<+F0@@ZBX$((B) 092-724-6342 (TEL $B$N$_(B) E-mail: NBC02362@@niftyserve.or.jp t^2 ($BC+K\9'9@@(B) diff -ru2N sed-1.18/ChangeLog.MB sed+mb1.03/ChangeLog.MB --- sed-1.18/ChangeLog.MB Thu Jan 1 09:00:00 1970 +++ sed+mb1.03/ChangeLog.MB Thu Jun 2 17:00:34 1994 @@@@ -0,0 +1,206 @@@@ +Thu Jun 2 16:56:47 1994 Takahiro Tanimoto (tt@@isaac) + + * Version 1.18 + multi-byte extension 1.03 released. + +Sat Mar 5 16:31:53 1994 Takahiro Tanimoto (tt@@isaac) + + * README.MSC: PC-9800 $B%7%j!<%:MQ(B MS-C 6.00A $B$N(B, $B%o%$%k%I%+!<%IE8(B + $B3+%k!<%A%s$N%P%0$KBP=h$7$?(B. $B0JA0$N(B stdargv.diff $B$r$3$l$KE}9g$7(B, + $B:o=|$7$?(B. (Thanks to $BJ!9@@K.$5$s(B ) + +Sat Aug 21 18:03:33 1993 Takahiro Tanimoto (tt at pc98) + + * Version 1.18 + multi-byte extension 1.02 released. + +Thu Aug 19 03:48:33 1993 Takahiro Tanimoto (tt@@isaac) + + * regex.c (re_compile_fastmap): charset_not $B$N(B fastmap $B$N:n@@.=h(B + $BM}$,4V0c$C$F$$$F(B, fastmap $B$r;HMQ$9$k>l9g(B (sed $B$b4^$^$l$k(B), $B@@55,(B + $BI=8=$N@@hF,$N(B [^$B#A(B] $B$d(B [^a] $B$KNc$($P(B $B#B(B $B$,%^%C%A$7$J$+$C$?(B. + (Thanks to $B>.20NIM4$5$s(B ) + +Tue Aug 10 00:23:37 1993 Takahiro Tanimoto (tt@@isaac) + + * regex.c (set_list_bits): $BJ8;z%/%i%9Cf$N%^%k%A%P%$%HJ8;z$N:GE,(B + $B2=$G(B, $B6h4V=*E@@$N99?7=hM}ItJ,$K%P%0$,$"$j(B, [$B#A(B-$B#C#E(B-$B#G#B(B-$B#D(B] $B$r:G(B + $BE,2=$9$k$H(B [$B#A(B-$B#G(B] $B$@@$,(B, $B$3$l$,(B [$B#A(B-$B#E(B] $B$H$J$C$F$7$^$C$F$$$?(B. + +Fri Jul 23 15:59:26 1993 Takahiro Tanimoto (tt@@isaac) + + * Version 1.18 + multi-byte extension 1.01 released. + +Fri Jul 23 02:05:23 1993 Takahiro Tanimoto (tt at pc98) + + * DEFS.dos: strcmpi(), strncmpi() $B$r(B stricmp(), strnicmp() $B$KJQ(B + $B99(B. + + * sed.c (main): MS-DOS $B$N>l9g(B, argv[0] $B$+$i%G%#%l%/%H%jL>(B, $B3HD%(B + $B;R$r.J8;z$K$7$F$+$i(B argv[0], myname $B$X%;%C%H$9$k$h$&(B + $B$KJQ99$7$?(B. + + * stdargv.diff: $BDI2C(B. + +Sun Jul 11 02:31:13 1993 Takahiro Tanimoto (tt@@isaac) + + * sed.c (compare_mbcs): $BF1$8%*%V%8%'%/%H$KBP$9$k%]%$%s%?$rEO$5$l(B + $B$?;~$K$O(B 0 $B$rJV$9$h$&$K=$@@5(B. + +Sat Jul 10 17:12:46 1993 Takahiro Tanimoto (tt@@isaac) + + * configure.in: strncasecmp $B$N%A%'%C%/$rDI2C(B. + + * sed.c (main): $B%^%k%A%P%$%HJ8;z$N%?%$%W;XDj%*%W%7%g%s$N7A<0$r(B + gawk-2.15.2+mb1.01, grep-2.0+mb1.01 $B$K9g$o$;$FJQ99(B. + + * grep-2.0+mb1.01 $B$+$i(B mbc.[ch] $B$rM"F~(B. regex.[ch], sed.c $B$r$=(B + $B$l$K=>$C$F=q$-49$($?(B. + + * regex.c (print_compiled_pattern): printf() $B$G(B syntax $B$N=PNO=q(B + $B<0$,4V0c$C$F$$$?(B. + + * regex.h: $BDj?t$N8e$K(B U, UL $B$r$D$1$k$H(B old-C $B$G%3%s%Q%$%k$G$-$J(B + $B$$(B. $B$3$l$i$r%-%c%9%H$KJQ99$7$?(B. + + * regex.h: RE_DUP_MAX $B$NDj5A$r(B 16 $B%S%C%H(B int $B$N%^%7%s$G$b%*!<%P(B + $B%U%m!<$7$J$$=q$-J}$K=$@@5(B. + +Sat Jul 3 06:53:23 1993 Takahiro Tanimoto (tt@@isaac) + + * Version 1.18 + multi-byte extension 1.00 released. + +Fri Jul 2 11:33:41 1993 Takahiro Tanimoto (tt@@isaac) + + * regex.c (regex_compile): set_list_bits $B$+$i$NJV$jCM$NH=Dj$K%P(B + $B%0$,$"$C$?$N$r=$@@5(B. ($BH=Dj$NI,MW$,$J$$ItJ,$@@$C$?$N$GC1$K:o=|$7$?(B.) + + * sed.c (compile_program), regex.c (set_list_bits): $B;HMQ$7$F$$$J(B + $B$$JQ?t$,$"$C$?$N$G:o=|$7$?(B. + +Fri Jul 2 10:34:35 1993 Takahiro Tanimoto (tt at pc98) + + * regex.c (re_match_2): $B%]%$%s%?F1;N$r(B int $B$K%-%c%9%H$7$F$+$iHf(B + $B3S$7$F$$$kItJ,$,$"$C$?$,(B, $B$=$N%-%c%9%H$r:o=|$7$?(B. $B%*%j%8%J%k$N(B + $B:n $B$G@@k8@@$5$l$F$$$FB?=EDj5A%(%i!<(B + $B$K$J$k(B. + + * sed.c: MS-C (6.00A) $B$K$O%W%m%H%?%$%W@@k8@@Cf$G(B + + void foo(int x, int); + + $B$N$h$&$KJQ?tL>$rIU$1$?0z?t$H7?;XDj$@@$1$N0z?t$r:.:_$5$;$k$H%(%i!<(B + $B$K$J$k$H$$$&%P%0$,$"$C$?(B. compile_program $B$N%W%m%H%?%$%W@@k8@@$,(B + $B$3$N%P%0$N$;$$$GDL$i$J$+$C$?$?$aJQ?tL>$rDI2C$7$F2sHr$7$?(B. + + * Makefile.msc DEFS.dos: DOS $B%5%]!<%H$N$?$aDI2C(B. + +Sat Jun 19 09:53:47 1993 Takahiro Tanimoto (tt@@isaac) + + * Version 1.18 + multi-byte extension 0.04 released. + + * sed.c (compile_program): y $B%3%^%s%I$N=hM}$,0lIt(B byte order $B$K(B + $B0MB8$7$F$$$F(B, little endian $B$N%^%7%s$G8mF0:n$7$F$$$?(B. (Thanks + to $BD9ED?N;V$5$s(B ) + +Wed Jun 2 02:10:51 1993 Takahiro Tanimoto (tt@@isaac) + + * sed-1.17-1.18.diff $B$rE,MQ(B. + +Tue Jun 1 20:33:36 1993 Takahiro Tanimoto (tt@@isaac) + + * Version 1.17 + multi-byte extension 0.03 released. + + * Version 1.15 $B"*(B 1.17 $B$NJQ99$r;\$7$?(B. + +Tue May 25 00:12:06 1993 Takahiro Tanimoto (tt@@isaac) + + * Version 1.15 + multi-byte extension 0.02 released. + +Mon May 24 15:37:50 1993 Takahiro Tanimoto (tt@@isaac) + + * sed.c: $BITI,MW$J(B isascii() $B$r:o=|$7$?(B. + + * regex.c (re_search_2): $B8eJ}$X(B advance $B$9$k:]$N%P%0$r=$@@5(B. + + * sed-1.14-1.15.diff $B$rE,MQ(B. + +Sat May 22 02:05:32 1993 Takahiro Tanimoto (tt@@isaac) + + * regex.c (re_match_2): exactn $B$N=hM}ItJ,$G(B, #if 0 $B$r(B #if 1 $B$K$7(B + $B$?>l9g(B, $B@@5$7$$=hM}$r9T$C$F$$$J$+$C$?$N$r=$@@5(B. + +Fri May 21 21:49:36 1993 Takahiro Tanimoto (tt@@isaac) + + * Version 1.14b + multi-byte extension 0.01. + + * regex.[ch]: mbcharset, mbcharset_not $B$rGQ;_(B. $BBe$o$j$K(B + charset, charset_not $B$,%^%k%A%P%$%HJ8;z$r$bJ];}$9$k(B. + + * regex.c (set_list_bits), regex.h (RE_TRANSLATED_RANGE): $BGQ;_(B. + +Wed May 19 05:27:18 1993 Takahiro Tanimoto (tt@@isaac) + + * sed.c (execute_program): l $B%3%^%s%I$G%^%k%A%P%$%HJ8;z$r=PNO$9(B + $B$k$h$&$KJQ99$7$?(B. (Shift-JIS $B$N:]$NH>3Q%+%JJ8;z$b(B.) + +Tue May 18 01:36:54 1993 Takahiro Tanimoto (tt@@albert) + + * configure.in: memmove() $B$N%A%'%C%/$rDI2C(B. + + * regex.h: !__STDC__ $B$N$H$-$K(B const $B$N(B #define $B$rDI2C(B. + + * sed.c (compile_regex): [^]...] $B$N07$$$K%*%j%8%J%k$N%P%0$,$"$C(B + $B$?$,(B, $B%^%k%A%P%$%HBP1~$KJQ99$9$k:](B, $B<+F0E*$K(B fix $B$5$l$?(B. + +Mon May 17 03:37:51 1993 Takahiro Tanimoto (tt@@isaac) + + * regex.h (RE_TRANSLATED_RANGE): $B2<5-$N5!G=$NA*Br$N$?$a$KDI2C(B. + + * regex.c (set_list_bits, regex_compile): translate([a-z]) $B$N2r(B + $Bl9g$K(B, translate([a-z]) $B$r(B, $B0JA0$O(B [ABC] $B$H$7$F2r .fname + echo -n sed-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q sed.c` > .fname + echo +mb`sed -e '/^ + multi-byte extension/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q sed.c` >> .fname rm -rf `cat .fname` mkdir `cat .fname` @@@@ -99,5 +113,6 @@@@ dist.afs: $(distfiles) - echo sed-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q sed.c` > .fname + echo -n sed-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q sed.c` > .fname + echo +mb`sed -e '/^ + multi-byte extension/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q sed.c` >> .fname rm -rf `cat .fname` mkdir `cat .fname` diff -ru2N sed-1.18/Makefile.msc sed+mb1.03/Makefile.msc --- sed-1.18/Makefile.msc Thu Jan 1 09:00:00 1970 +++ sed+mb1.03/Makefile.msc Fri Jul 23 02:49:14 1993 @@@@ -0,0 +1,106 @@@@ +# Generated automatically from Makefile.in by configure. +# Makefile for GNU SED, a batch editor. +# Copyright (C) 1987, 1991 Free Software Foundation, Inc. +# +# This file is part of GNU SED. +# +# GNU SED is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# GNU SED is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU SED; see the file COPYING. If not, write to +# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +# Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) +# Last change: Jul. 23, 1993 by t^2 + +#### Start of system configuration section. #### + +srcdir = . +VPATH = . + +CC = cl -nologo -AL +INSTALL = cp +INSTALL_PROGRAM = $(INSTALL) + +# Things you might add to DEFS: +# -DSTDC_HEADERS If you have ANSI C headers and libraries. +# -DUSG If you have System V/ANSI C string +# and memory functions and headers. +# -D__CHAR_UNSIGNED__ If type `char' is unsigned. +# gcc defines this automatically. +# -DNO_VFPRINTF If you lack vprintf function (but have _doprnt). +# +# For DOS, you must add them in DEFS.dos + +# Things you might set to MBCTYPE_DEF to spec. default multi-byte char type. +# -DEUC will make default multi-byte char type EUC and +# -DSJIS SJIS. +# If you do not set EUC/SJIS, sed assumes no multi-byte +# char as default. +MBCTYPE_DEF = -DSJIS + +LIBS = setargv /link /noe /st:10000 + +CFLAGS = -Ox -DREGEX_MALLOC=1 +LDFLAGS = +extra_objs = + +prefix = +exec_prefix = $(prefix) + +# Prefix for each installed program, normally empty or `g'. +binprefix = + +# Where to install the executable. +bindir = $(exec_prefix)/bin + +#### End of system configuration section. #### + +objs = sed.obj utils.obj regex.obj getopt.obj getopt1.obj mbc.obj +srcs = sed.c utils.c regex.c getopt.c getopt1.c mbc.c alloca.c + +distfiles = COPYING COPYING.LIB ChangeLo README INSTALL Makefile.in \ + configur configur.in regex.h getopt.h $(srcs) \ + ChangeLo.MB README.MB Makefile.msc DEFS.dos mbc.h stdargv.dif + +all_objs= $(objs) $(extra_objs) +all: sed.exe + +.c.obj: + cat defs.dos $*.c > $*_.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $(MBCTYPE_DEF) -I$(srcdir) -Fo$*.obj $*_.c + rm $*_.c + +sed.exe: $(all_objs) + $(CC) -o $@@ $(LDFLAGS) $(all_objs) $(LIBS) + +sed.obj regex.obj: regex.h +sed.obj getopt1.obj: getopt.h +sed.obj regex.obj: mbc.h + +install: all + $(INSTALL_PROGRAM) sed.exe $(bindir)/$(binprefix)sed.exe + +TAGS: $(srcs) + etags $(srcs) + +clean: + rm -f sed.exe *.obj + +mostlyclean: clean + +distclean: clean + rm -f Makefile config.sta + +realclean: distclean + rm -f TAGS + +alloca.obj : alloca.c diff -ru2N sed-1.18/README.MB sed+mb1.03/README.MB --- sed-1.18/README.MB Thu Jan 1 09:00:00 1970 +++ sed+mb1.03/README.MB Thu Jun 2 16:27:07 1994 @@@@ -0,0 +1,228 @@@@ +$B!|!|!|!|!|(B GNU sed version 1.18 + multi-byte extension 1.03 $B!|!|!|!|!|(B +$B!|!|!|!|!|(B Jun. 2, 1994 by t^2 $B!|!|!|!|!|(B + + sed-1.18+mb1.03 -- $B%^%k%A%P%$%HJ8;zBP1~HG(B GNU sed + +$B!|35MW(B + + GNU $B%W%m%8%'%/%H$K$h$k(B sed $B$r%^%k%A%P%$%HJ8;zBP1~2=$7$?$b$N$G$9(B. + +$B!|;HMQK!(B + + GNU sed $B$+$i$N3HD%ItJ,$@@$1$r@@bL@@$7$^$9(B. + + $BA}$($?%*%W%7%g%s$O0J2<$NDL$j$G$9(B. + + -Wctype=ASCII $B$^$?$O(B --ctype=ASCII + $B%^%k%A%P%$%HJ8;z$r9MN8$7$^$;$s(B. $B$3$N%*%W%7%g%s$r;HMQ$7$?>l(B + $B9g(B, GNU sed $B$N%*%j%8%J%k$HF1$8F0:n$K$J$k$O$:$G$9(B. + + -Wctype=EUC $B$^$?$O(B --ctype=EUC + $B%^%k%A%P%$%HJ8;z$H$7$F(B EUC $B$rG'<1$7$^$9(B. + + -Wctype=SJIS $B$^$?$O(B --ctype=SJIS + $B%^%k%A%P%$%HJ8;z$H$7$F(B Shift-JIS $B$rG'<1$7$^$9(B. + + MS-DOS $B0J30$N%7%9%F%`$G(B, Makefile(.in)? $B$r=q$-49$($:$K%$%s%9%H!<(B + $B%k$7$?>l9g(B, $B%G%U%)%k%H$G$O(B EUC $B$rG'<1$7$^$9(B. MS-DOS $B$G$O%G%U%)(B + $B%k%H$G(B Shift-JIS $B$rG'<1$7$^$9(B. + + $B0J2<(B, $B%3%^%s%I$J$I$NJQ99E@@$G$9(B. + + y $B%3%^%s%I(B $B%P%$%HKh$G$O$J$/J8;zKh$KJQ49$7$^$9(B. + + l $B%3%^%s%I(B $B%^%k%A%P%$%HJ8;z$r9MN8$7$?=PNO$r9T$$$^$9(B. $BFC$K(B, + EUC $B$NH>3Q%+%J$NJ8;zI}$r9MN8$7$F$$$^$9(B. + + $B$=$NB>(B $B%i%Y%k$K%^%k%A%P%$%HJ8;z$,;HMQ$G$-$^$9(B. + +$B!|(B SEDM103.LZH (MS-DOS $BHG$o$J$1$l$P$J$j$^$;$s(B. + + $B#1$D#1$D$N0z?t$O6uGr$G6h@@Z$j$^$9(B. $B0z?t$K6uGr(B, ", \, <, >, | $B$r(B + $B4^$`$H$-$O%/%)!<%F%#%s%0$,I,MW$G$9(B. $B$=$NJ}K!$O(B COMMAND.COM $B$N%P(B + $B%0=-$$;EMM$H(B, $B$5$i$K(B setargv.obj $B$K$bLdBj$,$"$j(B, $B$+$J$jFq$7$$$N(B + $B$G$3$3$G$O@@bL@@$r>J$-$^$9(B. $B3F<+8&5f$7$F$/$@@$5$$(B. $B0lHV4JC1$J$N$O(B, + sed $B$N%W%m%0%i%`$r%U%!%$%k$K$7$F(B + + sed -f $B%U%!%$%kL>(B + + $B$H$9$k$3$H$G$9(B. + +$B!|%$%s%9%H!<%k(B (MS-DOS $B0J30(B) + + $B%G%U%)%k%H$N%^%k%A%P%$%HJ8;z$N@@_Dj$O(B, Makefile.in $B$NCf$G;XDj$7$^$9(B. + $B%G%U%)%k%H$r(B Shift-JIS $B$H$9$k>l9g$H(B, $B%G%U%)%k%H$G%^%k%A%P%$%HJ8;z$r(B + $B;HMQ$7$J$$>l9g$O(B Makefile.in $B$N(B MBCTYPE_DEF $B%^%/%m$NDj5A$r$=$l$>$l0J(B + $B2<$N$h$&$KJQ$($F$/$@@$5$$(B. + + MBCTYPE_DEF = -DSJIS ($B%G%U%)%k%H$G(B Shift-JIS $B$N>l9g(B) + MBCTYPE_DEF = ($B%G%U%)%k%H$G;HMQ$7$J$$>l9g(B) + + $B$$$:$l$N>l9g$G$b5/F0;~$N%*%W%7%g%s$K$h$j%^%k%A%P%$%HJ8;z%3!<%I$NA*Br(B + $B$,2DG=$G$9(B. + + $B$=$NB>$N:n6H$O(B, $B%*%j%8%J%k$N(B sed $B$HF1MM$G$9$N$G(B INSTALL $B$r$*FI$_$/$@@(B + $B$5$$(B. + +$B!|%$%s%9%H!<%k(B (MS-DOS $BHG(B) + + MS-C 6.00A $B$r;HMQ$7$F(B, $B%G%U%)%k%H$G(B Shift-JIS $B$rG'<1$9$k(B sed $B$r:n@@.(B + $B$9$k>l9g$O(B, README.MSC $B$KL\$rDL$7$F(B, $BI,MW$J$i%i%$%V%i%j$K%Q%C%A$rEv(B + $B$F$?8e(B, + + A>nmake -f Makefile.msc + + $B$@@$1$G#O#K$G$9(B. $B$=$NB>$N=hM}7O$r;HMQ$9$k>l9g$d(B, $B%G%U%)%k%H$r(B + Shift-JIS $B0J30$K$7$?$$>l9g$O(B Makefile.msc $B$r;29M$K(B Makefile $B$r=q$$$F(B + $B$/$@@$5$$(B. + +$B!|%P%0(B + + 1. $B$$$o$f$k(B JIS $B$K$OBP1~$7$F$$$^$;$s(B. $B>-MhBP1~$9$kM=Dj$b$"$j$^$;$s(B. + + 2. $B%^%k%A%P%$%HJ8;z%3!<%I$O$"$^$j873J$K$O9M$($F$$$^$;$s(B. + + EUC $B#1%P%$%HL\(B ... 0x80 - 0xff + EUC $B#2%P%$%HL\(B ... 0x01 - 0xff (0x0a $B$r=|$/(B) + + Shift-JIS $B#1%P%$%HL\(B ... 0x80 - 0x9f, 0xe0 - 0xff + Shift-JIS $B#2%P%$%HL\(B ... 0x01 - 0xff (0x0a $B$r=|$/(B) + + $B$H$7$F=hM}$7$F$$$^$9(B. $BH>3Q%+%J$b;H$($^$9(B. EUC $B$N(B SS3 (0x8f) $B$K(B + $B;O$^$k#3%P%$%H%3!<%I$O;H$($^$;$s(B. ($B;d$O$3$l$r%5%]!<%H$7$F$$$k%7(B + $B%9%F%`$r8+$?$3$H$,$J$$(B...) + +$B!|(B regex.[ch] $B$N3HD%;EMM(B + + regex.[ch] $B%b%8%e!<%k$O(B mbc.[ch] $B%b%8%e!<%k$K0MB8$7$F$$$^$9$N$G(B, $B;H(B + $BMQ$9$k>l9g$K$O(B, mbc.[ch] $B%b%8%e!<%k$rF1;~$K;HMQ$9$kI,MW$,$"$j$^$9(B. + + $B%^%k%A%P%$%HJ8;z$N%?%$%W$O(B, mbc.[ch] $B$N(B mbcinit() $B$G@@_Dj$7$^$9(B. + mbc.h $B$KDj5A$5$l$F$$$k%^%/%m(B MBCTYPE_ASCII, MBCTYPE_EUC, + MBCTYPE_SJIS $B$N$$$:$l$+$r(B mbcinit() $B$KEO$7$F$/$@@$5$$(B. + + regex.[ch] $B$O(B, $B%Q%?!<%s%3%s%Q%$%k;~(B, $B%^%C%A%s%0;~$NN>J}$G(B mbc.[ch] + $B$N@@_Dj$r;2>H$7$^$9(B. $B$,(B, $B$3$NN>e0L%P%$%H(B, $B#2%P%$%HL\$r(B + $B2<0L%P%$%H$H$7$?#1#6%S%C%HId9f$J$7@@0?t$G$9(B. Shift-JIS $B$G$b(B EUC $B$G$b(B + + $B#1%P%$%H(B ASCII $BJ8;z(B < $BH>3Q%+%JJ8;z(B < $BA43QJ8;z(B + + $B$H$$$&Bg>.4X78$,@@.$jN)$C$F$$$^$9(B. + +$B!|>r7o$J$I(B + + 1. $B%*%j%8%J%k$N(B GNU sed $B$NCx:n8"$O(B Free Software Foundation, Inc. $B$,(B + $BM-$7$F$$$^$9(B. $B%Q%C%AItJ,(B (sed-mb.diff) $B$NCx:n8"$O;d(B (t^2) $B$,M-$7(B + $B$F$$$^$9(B. + + 2. GNU sed $B$N%=!<%9%3!<%I$O3F=j$N(B ftp $B%5%$%H(B, $B$b$7$/$O(B Nifty-serve + $B$N(B FUNIX $B$N%G!<%?%i%$%V%i%j$+$iF~$&I,MW$b$"$j$^$;$s(B. $B$7$+$7:9J,$rE,MQ$7$?7k2L$N%=!<%9%3!<%I(B, $B$*(B + $B$h$SH(B) $B$K=>$C$F$/$@@$5$$(B. + + sed+mb $B$K2?$i$+$N2~JQ$r2C$($?$b$N$r:FG[I[$9$k:]$b(B, GNU GENERAL + PUBLIC LICENSE $B$K=>$&$h$&$KCm0U$7$F$/$@@$5$$(B. $B$^$?(B sed+mb $B$K4^$^$l(B + $B$k%3!<%I(B (regex.[ch] $B$J$I(B) $B$rMxMQ$7$?%W%m%0%i%`$rG[I[$9$k:]$b(B GNU + GENERAL PUBLIC LICENSE $B$^$?$O(B GNU LIBRARY GENERAL PUBLIC LICENSE + (COPYING.LIB $B;2>H(B) $B$N3:EvItJ,$K=>$C$F$/$@@$5$$(B. + + $B$^$?5AL3$G$O$"$j$^$;$s$,:FG[I[$5$l$kJ}$O;v8e$K$G$bO"Mm$r$/$@@$5$$(B. + $B$=$7$F2DG=$J8B$j(B, $B?7$7$$%P!<%8%g%s$X$N%"%C%W%G!<%H$KEX$a(B, $BMxMQZ$G$9(B. + + 5. sed+mb $B$K2?$i$+$NIT6q9g$,H/@@8$7$?>l9g(B, (FSF $B$d(B, $B%*%j%8%J%k$N:n$7$F$$$k>l9g$O(B, + $B$=$N?M$KO"Mm$7$F$/$@@$5$$(B. + + 6. $B$4(B/$B$*<8$j(B, $B$=$NB>$bBg4?7^$G$9(B. $B$G$-$k$+$.$j%5%]!<%H$7(B + $B$^$9(B. + +$B!|:\(B/$B%P%0Js9p$r$/$@@$5$C$?J}!9$K46$r5s$2$5$;$F(B + $BD:$-$?$+$C$?$N$G$9$,%O!<%I%G%#%9%/$N%H%i%V%k$G$[$H$s$I$N%a!<%k$r>C<:(B + $B$5$;$F$7$^$$$^$7$?(B. + + $B:G8e$K(B, $B5.=E$J%G%#%9%/%9%Z!<%9$r(B sed+mb $B$N$?$a$K3d$$$F$4;HMQD:$$$F$$(B + $B$k$9$Y$F$NMxMQ5o$7$^$7$?(B) + TEL/FAX: 092-731-4025 (TEL/FAX $B<+F0@@ZBX$((B) + 092-724-6342 (TEL $B$N$_(B) + E-mail: NBC02362@@niftyserve.or.jp $BC+K\9'9@@(B + +# Local variables: +# mode: indented-text +# indent-tabs-mode: nil +# tab-stop-list: (4 8 16 24 32 40 48 56 64 72 80) +# left-margin: 4 +# fill-column: 72 +# fill-prefix: " " +# version-control: never +# End: diff -ru2N sed-1.18/README.MSC sed+mb1.03/README.MSC --- sed-1.18/README.MSC Thu Jan 1 09:00:00 1970 +++ sed+mb1.03/README.MSC Sat Mar 5 16:14:14 1994 @@@@ -0,0 +1,99 @@@@ +PC-9801 $BMQ(B MS-C version 6.00A $B$N0z?t$N%;%C%H%"%C%W%k!<%A%s$K$O%P%0$,$"$j(B +$B$^$9(B. + +#include + +int +main(int argc, char **argv) +{ + int i; + + for (i = 0; i <= argc; i++) + printf("argv[%d] == %s\n", i, argv[i]); + return 0; +} + +$B$r%3%s%Q%$%k(B, $B%j%s%/$7$?(B FOO.EXE $B$K(B + + A>foo "\\" abc + +$B$J$I$N0z?t$rEO$7$Fe5-$N%W%m%0%i%`$r(B SETARGV.OBJ $B$H$H$b$K%j%s(B +$B%/$7$F(B + + A>foo \DOS\*.com + +$B$J$I$N0z?t$Gl9g(B, +L/DOS/STDARGV.OBJ, L/DOS/_SETARGV.OBJ, L/WILD.OBJ $B$r$=$l$>$l(B +KSTDARGV.OBJ, _KSTARGV.OBJ, KWILD.OBJ $B$H%j%M!<%`$7(B, + + lib \msc6\lib\llibce.lib-+dos\kstdargv.obj-+dos\_kstargv.obj-+kwild.obj; + +$B$J$I$H$7$F%b%8%e!<%k$r99?7$7$F$/$@@$5$$(B. $BG0$N$?$a$3$N:n6H$r9T$&A0$K(B, + + lib \msc6\lib\llibce.lib*kstdargv.obj*_kstargv.obj*kwild.obj; + +$B$J$I$G(B, kstdargv.obj, _kstargv.obj, kwild.obj $B$N%P%C%/%"%C%W$r$H$C$FCV$/(B +$B$H$$$$$G$7$g$&(B. + +$B$J$*(B, $B$3$N%Q%C%A$OEvA3$N$3$H$J$,$iL5J]>Z$G$9(B. + +Mar. 5, 1994 t^2 + +*** stdargv.org Mon Oct 8 19:50:46 1990 +--- stdargv.asm Thu Jul 22 17:50:44 1993 +*************** +*** 409,415 **** + shr cx,1 + adc dx,cx ; add 1 for every pair of backslashes + test al,1 ; plus 1 for the " if odd number of \ +! jz arg310 ; [J1] + jmp arg210 ; [J1] + ; + ; Command line is fully parsed - compute number of bytes needed +--- 409,415 ---- + shr cx,1 + adc dx,cx ; add 1 for every pair of backslashes + test al,1 ; plus 1 for the " if odd number of \ +! jnz arg310 ; ! Jul.21.93 t^2 + jmp arg210 ; [J1] + ; + ; Command line is fully parsed - compute number of bytes needed + +*** wild.org Mon Oct 8 19:49:48 1990 +--- wild.c Sat Mar 5 00:42:12 1994 +*************** +*** 186,197 **** + char *ptr2 = arg; // [J1] + + if(ptr != arg) { // [J1] +! while(ptr2 + 1 != ptr && *ptr2 != SLASHCHAR && *ptr2 != FWDSLASHCHAR +! && *ptr2 != ':') { // [J1] + if(iskanji(*ptr2)) ptr2++; // [J1] + ptr2++; // [J1] + } // [J1] +! ptr = ptr2; // [J1] + } // [J1] + + if (*ptr == ':' && ptr != arg+1) /* weird name, just add it as is */ +--- 186,201 ---- + char *ptr2 = arg; // [J1] + + if(ptr != arg) { // [J1] +! char *ptr3 = arg; +! +! while (ptr2 < ptr) { +! if (*ptr2 == SLASHCHAR || *ptr2 == FWDSLASHCHAR +! || *ptr2 == ':') +! ptr3 = ptr2; + if(iskanji(*ptr2)) ptr2++; // [J1] + ptr2++; // [J1] + } // [J1] +! ptr = ptr3; + } // [J1] + + if (*ptr == ':' && ptr != arg+1) /* weird name, just add it as is */ diff -ru2N sed-1.18/configure sed+mb1.03/configure --- sed-1.18/configure Fri May 21 15:09:06 1993 +++ sed+mb1.03/configure Sat Jul 10 18:18:47 1993 @@@@ -294,5 +294,5 @@@@ fi -for func in bcopy memcpy +for func in bcopy memcpy memmove strncasecmp do trfunc=HAVE_`echo $func | tr '[a-z]' '[A-Z]'` diff -ru2N sed-1.18/configure.in sed+mb1.03/configure.in --- sed-1.18/configure.in Fri May 21 13:22:42 1993 +++ sed+mb1.03/configure.in Sat Jul 10 17:54:03 1993 @@@@ -9,5 +9,5 @@@@ AC_HAVE_HEADERS(string.h) AC_VPRINTF -AC_HAVE_FUNCS(bcopy memcpy) +AC_HAVE_FUNCS(bcopy memcpy memmove strcasecmp strncasecmp) AC_CONST AC_ALLOCA diff -ru2N sed-1.18/mbc.c sed+mb1.03/mbc.c --- sed-1.18/mbc.c Thu Jan 1 09:00:00 1970 +++ sed+mb1.03/mbc.c Fri Jul 9 14:38:28 1993 @@@@ -0,0 +1,98 @@@@ +/* Functions for multi-byte support. + Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto) + Last change: Jul. 9, 1993 by t^2 */ +#include "mbc.h" + +static const unsigned char mbctab_ascii[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const unsigned char mbctab_euc[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mbctab_sjis[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +#ifdef EUC +const unsigned char *mbctab = mbctab_euc; +int current_mbctype = MBCTYPE_EUC; +#else +#ifdef SJIS +const unsigned char *mbctab = mbctab_sjis; +int current_mbctype = MBCTYPE_SJIS; +#else +const unsigned char *mbctab = mbctab_ascii; +int current_mbctype = MBCTYPE_ASCII; +#endif +#endif + +void +#ifdef __STDC__ +mbcinit(int mbctype) +#else +mbcinit(mbctype) + int mbctype; +#endif +{ + switch (mbctype) { + case MBCTYPE_ASCII: + mbctab = mbctab_ascii; + current_mbctype = MBCTYPE_ASCII; + break; + case MBCTYPE_EUC: + mbctab = mbctab_euc; + current_mbctype = MBCTYPE_EUC; + break; + case MBCTYPE_SJIS: + mbctab = mbctab_sjis; + current_mbctype = MBCTYPE_SJIS; + break; + } +} diff -ru2N sed-1.18/mbc.h sed+mb1.03/mbc.h --- sed-1.18/mbc.h Thu Jan 1 09:00:00 1970 +++ sed+mb1.03/mbc.h Fri Jul 9 14:40:03 1993 @@@@ -0,0 +1,38 @@@@ +#ifndef MBC_H +#define MBC_H 1 +/* Definitions for multi-byte support. + Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto) + Last change: Jul. 9, 1993 by t^2 */ + +#ifndef const +#ifndef __STDC__ +#ifdef __GNUC__ +#define const __const__ +#define volatile __volatile__ +#else +#define const +#define volatile +#endif +#endif +#endif + +#ifndef _ +#ifdef __STDC__ +#define _(x) x +#else +#define _(x) () +#endif +#endif + +#define MBCTYPE_ASCII 0 +#define MBCTYPE_EUC 1 +#define MBCTYPE_SJIS 2 + +extern const unsigned char *mbctab; +extern int current_mbctype; + +void mbcinit _((int)); + +#define ismbchar(c) mbctab[(unsigned char)c] + +#endif /* !MBC_H */ diff -ru2N sed-1.18/regex.c sed+mb1.03/regex.c --- sed-1.18/regex.c Mon May 24 01:10:46 1993 +++ sed+mb1.03/regex.c Thu Aug 19 04:16:32 1993 @@@@ -19,4 +19,6 @@@@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Aug. 19, 1993 by t^2 */ /* AIX requires this to be the first thing in the file. */ @@@@ -54,6 +56,33 @@@@ #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) #endif +#ifdef HAVE_MEMMOVE #ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#define bcopy(s, d, n) memmove ((d), (s), (n)) +#endif +#else +#ifndef HAVE_BCOPY +static void +#ifdef __STDC__ +bcopy (const void *s0, void *d0, size_t n) +#else +bcopy (s, d, n) + const void *s0; + void *d; + size_t n; +#endif +{ + const char *s = s0; + char *d = d0; + + if (s < d) { + s += n, d += n; + while (n--) + *--d = *--s; + } + else + while (n--) + *d++ = *s++; +} +#endif #endif #ifndef bzero @@@@ -123,4 +152,5 @@@@ /* Get the interface, including the syntax bits. */ #include "regex.h" +#include "mbc.h" /* isalpha etc. are used for the character classes. */ @@@@ -450,4 +480,19 @@@@ #endif /* DEBUG */ + +#define STORE_MBC(p, c) \ + ((p)[0] = (unsigned char) ((c) >> 8), (p)[1] = (unsigned char) (c)) +#define STORE_MBC_AND_INCR(p, c) \ + (*(p)++ = (unsigned char) ((c) >> 8), *(p)++ = (unsigned char) (c)) + +#define EXTRACT_MBC(p) \ + ((unsigned char) (p)[0] << 8 | (unsigned char) (p)[1]) +#define EXTRACT_MBC_AND_INCR(p) \ + ((p) += 2, (unsigned char) (p)[-2] << 8 | (unsigned char) (p)[-1]) + +#define EXTRACT_UNSIGNED(p) \ + ((unsigned char) (p)[0] | (unsigned char) (p)[1] << 8) +#define EXTRACT_UNSIGNED_AND_INCR(p) \ + ((p) += 2, (unsigned char) (p)[-2] | (unsigned char) (p)[-1] << 8) /* If DEBUG is defined, Regex prints many voluminous messages about what @@@@ -544,4 +589,8 @@@@ { putchar ('/'); + if (ismbchar (*p) && 2 <= mcnt) { + printf ("/%.2s", (char *) p), p += 2, --mcnt; + continue; + } printchar (*p++); } @@@@ -574,4 +623,5 @@@@ printf ("/charset%s", (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); + printf ("/%d", *p); assert (p + *p < pend); @@@@ -589,4 +639,13 @@@@ } p += 1 + *p; + { + unsigned short i, size; + + size = EXTRACT_UNSIGNED_AND_INCR (p); + printf ("/0/%d", size); + for (i = 0; i < size; i++) + printf ("//%.2s-//%.2s", (char *) p, (char *) p + 2), + p += 4; + } break; } @@@@ -744,5 +803,5 @@@@ printf ("not_bol: %d\t", bufp->not_bol); printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); + printf ("syntax: %lu\n", bufp->syntax); /* Perhaps we should print the translate table? */ } @@@@ -843,5 +902,7 @@@@ static boolean at_begline_loc_p (), at_endline_loc_p (); static boolean group_in_compile_stack (); +#if 0 static reg_errcode_t compile_range (); +#endif /* Fetch the next character in the uncompiled pattern---translating it @@@@ -852,5 +913,6 @@@@ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ + if (translate && !ismbchar (c)) \ + c = (unsigned char) translate[(unsigned char) c]; \ } while (0) @@@@ -870,5 +932,7 @@@@ `char *', to avoid warnings when a string constant is passed. But when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) +#define TRANSLATE(d) (translate \ + ? (unsigned char) translate[(unsigned char) (d)] \ + : (d)) @@@@ -1040,4 +1104,159 @@@@ || STREQ (string, "cntrl") || STREQ (string, "blank")) +/* Handle charset(_not)?. + + Structure of charset(_not)? in compiled pattern. + + struct { + unsinged char id; charset(_not)? + unsigned char sbc_size; + unsigned char sbc_map[sbc_size]; same as original up to here. + unsigned short mbc_size; number of intervals. + struct { + unsigned short beg; beginning of interval. + unsigned short end; end of interval. + } intervals[mbc_size]; + }; */ + +static reg_errcode_t +#ifdef __STDC__ +set_list_bits (unsigned short c1, unsigned short c2, + reg_syntax_t syntax, unsigned char *b, const char *translate) +#else +set_list_bits (c1, c2, syntax, b, translate) + unsigned short c1, c2; + reg_syntax_t syntax; + unsigned char *b; + const char *translate; +#endif +{ + unsigned char sbc_size = b[-1]; + unsigned short mbc_size = EXTRACT_UNSIGNED (&b[sbc_size]); + unsigned short beg, end, upb; + + if (c1 > c2) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + if (c1 < 1 << BYTEWIDTH) { + upb = c2; + if (1 << BYTEWIDTH <= upb) + upb = (1 << BYTEWIDTH) - 1; /* The last single-byte char */ + if (sbc_size <= upb / BYTEWIDTH) { + /* Allocate maximum size so it never happens again. */ + /* NOTE: memcpy() would not work here. */ + bcopy (&b[sbc_size], &b[(1 << BYTEWIDTH) / BYTEWIDTH], 2 + mbc_size*4); + bzero (&b[sbc_size], (1 << BYTEWIDTH) / BYTEWIDTH - sbc_size); + b[-1] = sbc_size = (1 << BYTEWIDTH) / BYTEWIDTH; + } + if (!translate) { + for (; c1 <= upb; c1++) + if (!ismbchar (c1)) + SET_LIST_BIT (c1); + } + else + for (; c1 <= upb; c1++) + if (!ismbchar (c1)) + SET_LIST_BIT (TRANSLATE (c1)); + if (c2 < 1 << BYTEWIDTH) + return REG_NOERROR; + c1 = 0x8000; /* The first wide char */ + } + b = &b[sbc_size + 2]; + + /* intervals[beg] + $B!|(B----------$B!|(B $B!|(B----------$B!|(B + c1 + $B!{(B----------------------$B!|(B + + $B>e?^$N$h$&$J6h4V$N%$%s%G%C%/%9(B beg $B$r7hDj$9$k(B. */ + for (beg = 0, upb = mbc_size; beg < upb; ) { + unsigned short mid = (beg + upb) >> 1; + + if (c1 - 1 > EXTRACT_MBC (&b[mid*4 + 2])) + beg = mid + 1; + else + upb = mid; + } + + /* intervals[end] + $B!|(B-------$B!|(B $B!|(B----------$B!|(B + c2 + $B!|(B---------------$B!{(B + + $B>e?^$N$h$&$J6h4V$N%$%s%G%C%/%9(B end $B$r7hDj$9$k(B. */ + for (end = beg, upb = mbc_size; end < upb; ) { + unsigned short mid = (end + upb) >> 1; + + if (c2 >= EXTRACT_MBC (&b[mid*4]) - 1) + end = mid + 1; + else + upb = mid; + } + + if (beg != end) { + /* $B4{B8$N6h4V$r>/$J$/$H$b(B1$B$DE}9g$9$k>l9g(B, + $B6h4V$N;OE@@(B, $B=*E@@$r=$@@5$9$k(B. */ + if (c1 > EXTRACT_MBC (&b[beg*4])) + c1 = EXTRACT_MBC (&b[beg*4]); + if (c2 < EXTRACT_MBC (&b[end*4 - 2])) + c2 = EXTRACT_MBC (&b[end*4 - 2]); + } + if (end < mbc_size && end != beg + 1) + /* $BDI2C$5$l$k6h4V$N8e$m$K4{B8$N6h4V$r0\F0$9$k(B. */ + /* NOTE: memcpy() would not work here. */ + bcopy (&b[end*4], &b[(beg + 1)*4], (mbc_size - end)*4); + STORE_MBC (&b[beg*4 + 0], c1); + STORE_MBC (&b[beg*4 + 2], c2); + mbc_size += beg + 1 - end; + STORE_NUMBER (&b[-2], mbc_size); + return REG_NOERROR; +} + +static int +#ifdef __STDC__ +is_in_list (unsigned short c, const unsigned char *b) +#else +is_in_list (c, b) + unsigned short c; + const unsigned char *b; +#endif +{ + unsigned short size; + int in = (re_opcode_t) b[-1] == charset_not; + + size = *b++; + if (c < 1 << BYTEWIDTH) { + if (c / BYTEWIDTH < size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) + in = !in; + } + else { + unsigned short i, j; + + b += size + 2; + size = EXTRACT_UNSIGNED (&b[-2]); + + /* intervals[i] + $B!|(B-------$B!|(B $B!|(B--------$B!|(B + c + $B!{(B----------------$B!|(B + + $B>e?^$N$h$&$J6h4V$N%$%s%G%C%/%9(B i $B$r7hDj$9$k(B. */ + for (i = 0, j = size; i < j; ) { + unsigned short k = (i + j) >> 1; + + if (c > EXTRACT_MBC (&b[k*4 + 2])) + i = k + 1; + else + j = k; + } + if (i < size && EXTRACT_MBC (&b[i*4]) <= c + /* [...] $B$+$i(B, $BL58z$J%^%k%A%P%$%HJ8;z$r=|30$9$k(B. $B$3$3$G$O4JC1$N(B + $B$?$a#2%P%$%HL\$,(B '\n' $B$^$?$O(B '\0' $B$@@$1$rL58z$H$7$?(B. [^...] + $B$N>l9g$O(B, $B5U$KL58z$J%^%k%A%P%$%HJ8;z$r%^%C%A$5$;$k(B. */ + && ((unsigned char) c != '\n' && (unsigned char) c != '\0')) + in = !in; + } + return in; +} + /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. Returns one of error codes defined in `regex.h', or zero for success. @@@@ -1350,4 +1569,6 @@@@ { boolean had_char_class = false; + unsigned short c, c1; + int last_char = -1; if (p == pend) return REG_EBRACK; @@@@ -1355,5 +1576,6 @@@@ /* Ensure that we have enough space to push a charset: the opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); + /* + 2 + 4 for mbcharset(_not)? with just one interval. */ + GET_BUFFER_SPACE (34 + 2 + 4); laststart = b; @@@@ -1372,5 +1594,5 @@@@ /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH + 2); /* charset_not matches newline according to a syntax bit. */ @@@@ -1382,7 +1604,14 @@@@ for (;;) { + int size; + if (p == pend) return REG_EBRACK; - PATFETCH (c); + if ((size = EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH]))) + /* Ensure the space is enough to hold another interval + of multi-byte chars in charset(_not)?. */ + GET_BUFFER_SPACE (32 + 2 + size*4 + 4); + + PATFETCH_RAW (c); /* \ might escape characters inside [...] and [^...]. */ @@@@ -1391,6 +1620,16 @@@@ if (p == pend) return REG_EESCAPE; - PATFETCH (c1); - SET_LIST_BIT (c1); + PATFETCH_RAW (c1); + if (ismbchar (c1)) { + unsigned char c2; + + PATFETCH_RAW (c2); + c1 = c1 << 8 | c2; + (void) set_list_bits (c1, c1, syntax, b, translate); + last_char = c1; + continue; + } + SET_LIST_BIT (TRANSLATE (c1)); + last_char = c1; continue; } @@@@ -1407,4 +1646,11 @@@@ return REG_ERANGE; + if (ismbchar (c)) { + unsigned char c2; + + PATFETCH_RAW (c2); + c = c << 8 | c2; + } + /* Look ahead to see if it's a range when the last thing was a character: if this is a hyphen not at the @@@@ -1412,10 +1658,25 @@@@ operator. */ if (c == '-' +#if 0 /* The original was: */ && !(p - 2 >= pattern && p[-2] == '[') && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') +#else /* I wonder why he did not write like this. + Have we got any problems? */ + && p != p1 + 1 +#endif && *p != ']') { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); + reg_errcode_t ret; + + assert (last_char >= 0); + PATFETCH_RAW (c1); + if (ismbchar (c1)) { + unsigned char c2; + + PATFETCH_RAW (c2); + c1 = c1 << 8 | c2; + } + ret = set_list_bits (last_char, c1, syntax, b, translate); + last_char = c1; if (ret != REG_NOERROR) return ret; } @@@@ -1426,7 +1687,15 @@@@ /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); + PATFETCH_RAW (c1); + + PATFETCH_RAW (c1); + if (ismbchar (c1)) { + unsigned char c2; + + PATFETCH_RAW (c2); + c1 = c1 << 8 | c2; + } + ret = set_list_bits (c, c1, syntax, b, translate); + last_char = c1; if (ret != REG_NOERROR) return ret; } @@@@ -1439,5 +1708,5 @@@@ char str[CHAR_CLASS_MAX_LENGTH + 1]; - PATFETCH (c); + PATFETCH_RAW (c); c1 = 0; @@@@ -1499,4 +1768,7 @@@@ } had_char_class = true; +#ifdef DEBUG + last_char = -1; +#endif } else @@@@ -1505,7 +1777,13 @@@@ while (c1--) PATUNFETCH; +#if 0 /* The original was: */ SET_LIST_BIT ('['); SET_LIST_BIT (':'); +#else /* I think this is the right way. */ + SET_LIST_BIT (TRANSLATE ('[')); + SET_LIST_BIT (TRANSLATE (':')); +#endif had_char_class = false; + last_char = ':'; } } @@@@ -1513,5 +1791,6 @@@@ { had_char_class = false; - SET_LIST_BIT (c); + (void) set_list_bits (c, c, syntax, b, translate); + last_char = c; } } @@@@ -1521,5 +1800,9 @@@@ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; - b += b[-1]; + if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH) + bcopy (&b[(1 << BYTEWIDTH) / BYTEWIDTH], &b[b[-1]], + 2 + EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])*4); + b += b[-1] + 2 + EXTRACT_UNSIGNED (&b[b[-1]])*4; + break; } break; @@@@ -1988,5 +2271,6 @@@@ not to translate; but if we don't translate it it will never match anything. */ - c = TRANSLATE (c); + if (!ismbchar (c)) + c = TRANSLATE (c); goto normal_char; } @@@@ -1997,4 +2281,11 @@@@ /* Expects the character in `c'. */ normal_char: + + c1 = 0; + if (ismbchar (c)) { + c1 = c; + PATFETCH_RAW (c); + } + /* If no exactn currently being built. */ if (!pending_exact @@@@ -2004,5 +2295,6 @@@@ /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 + || *pending_exact >= (c1 ? (1 << BYTEWIDTH) - 2 + : (1 << BYTEWIDTH) - 1) /* If followed by a repetition operator. */ @@@@ -2024,4 +2316,8 @@@@ } + if (c1) { + BUF_PUSH (c1); + (*pending_exact)++; + } BUF_PUSH (c); (*pending_exact)++; @@@@ -2149,5 +2445,5 @@@@ at_endline_loc_p (p, pend, syntax) const char *p, *pend; - int syntax; + reg_syntax_t syntax; { const char *next = p; @@@@ -2185,4 +2481,5 @@@@ +#if 0 /* We use set_list_bits() now. */ /* Read the ending character of a range (in a bracket expression) from the uncompiled pattern *P_PTR (which ends at PEND). We assume the @@@@ -2240,4 +2537,5 @@@@ return REG_NOERROR; } +#endif /* Failure stack declarations and macros; both re_compile_fastmap and @@@@ -2603,18 +2901,65 @@@@ case charset: + /* NOTE: Charset for single-byte chars never contain + multi-byte char. See set_list_bits(). */ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) fastmap[j] = 1; + { + unsigned short size; + unsigned char c, end; + + p += p[-1] + 2; + size = EXTRACT_UNSIGNED (&p[-2]); + for (j = 0; j < size; j++) + /* set bits for 1st bytes of multi-byte chars. */ + for (c = (unsigned char) p[j*4], + end = (unsigned char) p[j*4 + 2]; + c <= end; c++) + /* NOTE: Charset for multi-byte chars might contain + single-byte chars. We must reject them. */ + if (ismbchar (c)) + fastmap[c] = 1; + } break; case charset_not: + /* S: set of all single-byte chars. + M: set of all first bytes that can start multi-byte chars. + s: any set of single-byte chars. + m: any set of first bytes that can start multi-byte chars. + + We assume S+M = U. + ___ _ _ + s+m = (S*s+M*m). */ /* Chars beyond end of map must be allowed. */ + /* NOTE: Charset_not for single-byte chars might contain + multi-byte chars. See set_list_bits(). */ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; + if (!ismbchar (j)) + fastmap[j] = 1; for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; + if (!ismbchar (j)) + fastmap[j] = 1; + { + unsigned short size; + unsigned short c, beg; + + p += p[-1] + 2; + size = EXTRACT_UNSIGNED (&p[-2]); + c = 0x00; + for (j = 0; j < size; j++) { + for (beg = (unsigned char) p[j*4 + 0]; c <= beg; c++) + if (ismbchar (c)) + fastmap[c] = 1; + c = (unsigned char) p[j*4 + 2]; + } + for (beg = 0xff; c <= beg; c++) + if (ismbchar (c)) + fastmap[c] = 1; + } break; @@@@ -2929,4 +3274,5 @@@@ register int lim = 0; int irange = range; + unsigned char c; if (startpos < size1 && startpos + range >= size1) @@@@ -2938,11 +3284,23 @@@@ inside the loop. */ if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) + while (range > lim) { + c = *d++; + if (ismbchar (c)) { + if (fastmap[c]) + break; + d++; + range -= 2; + continue; + } + if (fastmap[(unsigned char) translate[c]]) + break; range--; + } else - while (range > lim && !fastmap[(unsigned char) *d++]) + while (range > lim && (c = *d++, !fastmap[c])) { + if (ismbchar (c)) + d++, range--; range--; + } startpos += irange - range; @@@@ -2977,11 +3335,34 @@@@ else if (range > 0) { - range--; - startpos++; + const char *d = ((startpos >= size1 ? string2 - size1 : string1) + + startpos); + + if (ismbchar (*d)) { + range--, startpos++; + if (!range) + break; + } + range--, startpos++; } else { - range++; - startpos--; + range++, startpos--; + { + const char *s, *d, *p; + + if (startpos < size1) + s = string1, d = string1 + startpos; + else + s = string2, d = string2 + startpos - size1; + for (p = d; p-- > s && ismbchar(*p); ) + /* --p >= s $B$@@$H(B 80[12]?86 $B$GF0$+$J$$2DG=@@-$,$"$k(B. (huge + model $B0J30$G(B, s $B$N%*%U%;%C%H$,(B 0 $B$@@$C$?>l9g(B.) */ + ; + if (!((d - p) & 1)) { + if (!range) + break; + range++, startpos--; + } + } } } @@@@ -3539,6 +3920,19 @@@@ do { + unsigned char c; + PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) + c = *d++; + if (ismbchar (c)) { + if (c != (unsigned char) *p++ + || !--mcnt /* $B%Q%?!<%s$,@@5$7$/%3%s%Q%$%k$5(B + $B$l$F$$$k8B$j(B, $B$3$N%A%'%C%/$O(B + $B>iD9$@@$,G0$N$?$a(B. */ + || d == dend + || (unsigned char) *d++ != (unsigned char) *p++) + goto fail; + continue; + } + if ((unsigned char) translate[c] != (unsigned char) *p++) goto fail; } @@@@ -3549,6 +3943,26 @@@@ do { +#if 0 + /* $BB>$NItJ,$G$O(B, string1 $B$H(B string2 $B$K%^%k%A%P%$%HJ8;z(B + $B$,8Y$k$N$r5v$7$F$$$J$$(B. $B$3$N$3$H$rB.EY$r5>@@7$K$7$F(B + $B$b%A%'%C%/$9$k>l9g$O(B, $B$3$3$Hsyntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') @@@@ -3577,19 +4001,23 @@@@ case charset_not: { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; + register unsigned short c; + boolean not; - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + DEBUG_PRINT2 ("EXECUTING charset%s.\n", + (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ + c = (unsigned char) *d; + if (ismbchar (c)) { + c <<= 8; + if (d + 1 != dend) + c |= (unsigned char) d[1]; + } + else + c = TRANSLATE (c); /* The character to match. */ - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; + not = is_in_list (c, p); - p += 1 + *p; + p += 1 + *p + 2 + EXTRACT_UNSIGNED (&p[1 + *p])*4; if (!not) goto fail; @@@@ -3597,4 +4025,6 @@@@ SET_REGS_MATCHED (); d++; + if (d != dend && c >= 1 << BYTEWIDTH) + d++; break; } @@@@ -3762,5 +4192,5 @@@@ /* xx why this test? */ - if ((int) old_regend[r] >= (int) regstart[r]) + if (old_regend[r] >= regstart[r]) regend[r] = old_regend[r]; } @@@@ -4013,5 +4443,5 @@@@ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) { - register unsigned char c + register unsigned short c = *p2 == (unsigned char) endline ? '\n' : p2[2]; p1 = p + mcnt; @@@@ -4030,13 +4460,10 @@@@ || (re_opcode_t) p1[3] == charset_not) { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; + if (ismbchar (c)) + c = c << 8 | p2[3]; - /* `not' is equal to 1 if c would match, which means + /* `is_in_list()' is TRUE if c would match, which means that we can't change to pop_failure_jump. */ - if (!not) + if (!is_in_list (c, p1 + 4)) { p[-3] = (unsigned char) pop_failure_jump; @@@@ -4593,8 +5020,15 @@@@ char *translate; { - register unsigned char *p1 = s1, *p2 = s2; + register unsigned char *p1 = s1, *p2 = s2, c; while (len) { - if (translate[*p1++] != translate[*p2++]) return 1; + c = *p1++; + if (ismbchar(c)) { + if (c != *p2++ || !--len || *p1++ != *p2++) + return 1; + } + else + if (translate[c] != translate[*p2++]) + return 1; len--; } @@@@ -4739,5 +5173,5 @@@@ { reg_errcode_t ret; - unsigned syntax + reg_syntax_t syntax = (cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; diff -ru2N sed-1.18/regex.h sed+mb1.03/regex.h --- sed-1.18/regex.h Sat Apr 3 08:17:33 1993 +++ sed+mb1.03/regex.h Sat Jul 10 17:23:29 1993 @@@@ -17,4 +17,6 @@@@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Jul. 10, 1993 by t^2 */ #ifndef __REGEXP_LIBRARY_H__ @@@@ -36,9 +38,9 @@@@ the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; +typedef unsigned long reg_syntax_t; /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long)1) /* If this bit is not set, then + and ? are operators, and \+ and \? are @@@@ -206,5 +208,5 @@@@ #undef RE_DUP_MAX #endif -#define RE_DUP_MAX ((1 << 15) - 1) +#define RE_DUP_MAX ((int)(((unsigned)1 << 15) - 1)) @@@@ -397,4 +399,10 @@@@ #define _RE_ARGS(args) () + +#ifdef __GNUC__ +#define const __const__ +#else +#define const +#endif #endif /* not __STDC__ */ diff -ru2N sed-1.18/sed.c sed+mb1.03/sed.c --- sed-1.18/sed.c Wed Jun 2 02:10:33 1993 +++ sed+mb1.03/sed.c Thu Jun 2 16:57:49 1994 @@@@ -15,4 +15,6 @@@@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Jun. 2, 1994 by t^2 */ #ifdef __STDC__ @@@@ -43,4 +45,5 @@@@ #include #endif +#include "mbc.h" #ifndef HAVE_BCOPY @@@@ -69,5 +72,73 @@@@ #endif -char *version_string = "GNU sed version 1.18"; +#ifndef HAVE_STRNCASECMP +static const unsigned char +lowertab[] = { + 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007, + 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, + 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, + 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, + 0040, '!', '"', '#', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + '@@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '[', '\\', ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '{', '|', '}', '~', 0177, + 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, + 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, + 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, + 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, + 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, + 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, + 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, + 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, + 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, + 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, + 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, + 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, + 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, + 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, + 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, + 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 +}; + +static int +strcasecmp(s1, s2) + char *s1, *s2; +{ + int c1; + + do { + if (!(c1 = (unsigned char)lowertab[(unsigned char)*s1++])) + return !*s2 ? 0 : -1; + } while (!(c1 -= (unsigned char)lowertab[(unsigned char)*s2++])); + return c1; +} + +static int +strncasecmp(s1, s2, n) + char *s1, *s2; + int n; +{ + int c1; + + while (n--) { + if (!(c1 = (unsigned char)lowertab[(unsigned char)*s1++])) + return !*s2 ? 0 : -1; + if ((c1 -= (unsigned char)lowertab[(unsigned char)*s2++])) + return c1; + } + return 0; +} +#endif + +char *version_string = "GNU sed version 1.18\ + + multi-byte extension 1.03"; /* Struct vector is used to describe a chunk of a compiled sed program. @@@@ -185,5 +256,10 @@@@ /* This for the y command */ - unsigned char *translate; + struct + { + unsigned short *translate; + unsigned short mbcsize; + } + cmd_y; /* For { */ @@@@ -239,9 +315,10 @@@@ void add_buffer P_ ((VOID * bb, char *p, int n)); void add1_buffer P_ ((VOID * bb, int ch)); +void add2_buffer P_ ((VOID * bb, int ch)); char *get_buffer P_ ((VOID * bb)); void compile_string P_ ((char *str)); void compile_file P_ ((char *str)); -struct vector *compile_program P_ ((struct vector * vector, int)); +struct vector *compile_program P_ ((struct vector * vector, int open_line)); void bad_prog P_ ((char *why)); int inchar P_ ((void)); @@@@ -349,4 +426,5 @@@@ static char NO_REGEX[] = "No previous regular expression"; static char NO_COMMAND[] = "Missing command"; +static char BAD_MBC[] = "Multi-byte char incomplete"; static struct option longopts[] = @@@@ -358,4 +436,5 @@@@ {"version", 0, NULL, 'V'}, {"help", 0, NULL, 'h'}, + {"ctype", 1, NULL, 'T'}, {NULL, 0, NULL, 0} }; @@@@ -374,6 +453,19 @@@@ re_set_syntax (RE_SYNTAX_POSIX_BASIC); +#ifdef MSDOS + { + char *p, *q, c; + static char argv0[8 + 1]; + + for (q = p = argv[0]; (c = *p++); ) + if (c == '/' || c == '\\' || c == ':') + q = p; + for (p = argv0; p < &argv0[8] && (c = *q++) && c != '.'; ) + *p++ = tolower((unsigned char)c); + argv[0] = argv0; + } +#endif myname = argv[0]; - while ((opt = getopt_long (argc, argv, "hne:f:V", longopts, (int *) 0)) + while ((opt = getopt_long (argc, argv, "hne:f:VW:", longopts, (int *) 0)) != EOF) { @@@@ -408,4 +500,24 @@@@ usage (0); break; + case 'W': + if (strncasecmp(optarg, "ctype=", 6) != 0) + usage (4); + optarg += 6; + /*FALLDOWN*/ + case 'T': + if (strcasecmp(optarg, "ASCII") == 0) { + mbcinit(MBCTYPE_ASCII); + break; + } + if (strcasecmp(optarg, "EUC") == 0) { + mbcinit(MBCTYPE_EUC); + break; + } + if (strcasecmp(optarg, "SJIS") == 0) { + mbcinit(MBCTYPE_SJIS); + break; + } + fprintf (stderr, "%s: unknown ctype\n", optarg); + /*FALLDOWN*/ default: usage (4); @@@@ -526,4 +638,23 @@@@ #define MORE_CMDS 40 +static int +#ifdef __STDC__ +compare_mbcs(const void *p1, const void *p2) +#else +compare_mbcs(p1, p2) + /* The `const' keyword is #defined in regex.h. */ + const void *p1, *p2; +#endif +{ + unsigned short c1 = *(const unsigned short *)p1; + unsigned short c2 = *(const unsigned short *)p2; + + if (c1 == c2) + return (p1 == p2 ? 0 + /* qsort() $B$,0BDj$K$J$k$h$&$K(B, $BBg>.4X78$r6/@@)$9$k(B. */ + : p1 < p2 ? -1 : 1); + return c1 < c2 ? -1 : 1; +} + /* Read a program (or a subprogram within '{' '}' pairs) in and store the compiled form in *'vector' Return a pointer to the new vector. */ @@@@ -828,22 +959,42 @@@@ case 'y': cur_cmd->cmd = ch; - string = (unsigned char *) ck_malloc (256); - for (num = 0; num < 256; num++) - string[num] = num; - b = init_buffer (); - slash = inchar (); - while ((ch = inchar ()) != EOF && ch != slash) - add1_buffer (b, ch); - cur_cmd->x.translate = string; - string = (unsigned char *) get_buffer (b); - for (num = size_buffer (b); num; --num) - { - ch = inchar (); - if (ch == EOF) - bad_prog (BAD_EOF); - if (ch == slash) - bad_prog ("strings for y command are different lengths"); - cur_cmd->x.translate[*string++] = ch; + { + unsigned short *q; + unsigned char *p; + int i, n; + + b = init_buffer (); + n = 0; /* count multi-byte chars. */ + slash = inchar (); + while ((ch = inchar ()) != EOF && ch != slash) { + if ((unsigned short)ch >= 256) + n++; + add2_buffer (b, ch); } + q = (unsigned short *) ck_malloc (512 + n*2*sizeof (unsigned short)); + cur_cmd->x.cmd_y.translate = q; + cur_cmd->x.cmd_y.mbcsize = n; + for (num = 0; num < 256; num++) + q[num] = num; + p = (unsigned char *) get_buffer (b); + for (i = 256, num = size_buffer (b) >> 1; num; --num) + { + unsigned char c; + + ch = inchar (); + if (ch == EOF) + bad_prog (BAD_EOF); + if (ch == slash) + bad_prog ("strings for y command are different lengths"); + c = *p++; + if (!c) + q[*p++] = ch; + else + q[i++] = c << 8 | *p++, + q[i++] = ch; + } + if (n) + qsort(&q[256], n, 2*sizeof (unsigned short), compare_mbcs); + } flush_buffer (b); if (inchar () != slash || ((ch = inchar ()) != EOF && ch != '\n' && ch != ';')) @@@@ -880,5 +1031,5 @@@@ be meaningful. */ int -inchar () +inchar_raw () { int ch; @@@@ -907,4 +1058,25 @@@@ } +int saved_char = EOF; + +int +inchar () +{ + int c, c2; + + if (saved_char != EOF) { + c = saved_char; + saved_char = EOF; + return c; + } + c = inchar_raw (); + if (c == EOF || !ismbchar (c)) + return c; + c2 = inchar_raw (); + if (c2 == EOF || c2 == '\n' || c2 == '\0') + bad_prog (BAD_MBC); + return c << 8 | (unsigned char) c2; +} + /* unget 'ch' so the next call to inchar will return it. 'ch' must not be EOF or anything nasty like that. */ @@@@ -913,4 +1085,8 @@@@ int ch; { + if (ch >= 256) { + saved_char = ch; + return; + } if (ch == EOF) return; @@@@ -1178,7 +1354,9 @@@@ if (input_file == 0) { +#ifndef MSDOS extern int errno; extern char *sys_errlist[]; extern int sys_nerr; +#endif char *ptr; @@@@ -1405,9 +1583,21 @@@@ width += 2; } - else if (isprint (*tmp)) + else if (isprint ((unsigned char)*tmp) + || (current_mbctype == MBCTYPE_SJIS + && '\240' <= *tmp && *tmp <= '\337')) { putchar (*tmp); width++; } + else if (ismbchar (tmp[0]) && n >= 2 + && tmp[1] != '\n' && tmp[1] != '\0') + { + putchar (tmp[0]); + putchar (tmp[1]); + if (!(current_mbctype == MBCTYPE_EUC && tmp[0] == '\216')) + width++; + width++; + --n; + } else switch (*tmp) @@@@ -1663,9 +1853,61 @@@@ case 'y': - { + if (current_mbctype == MBCTYPE_ASCII) { unsigned char *p, *e; for (p = (unsigned char *) (line.text), e = p + line.length; p < e; p++) - *p = cur_cmd->x.translate[*p]; + *p = cur_cmd->x.cmd_y.translate[*p]; + } + else { + unsigned char *p, *e; + int i = 0, j, k; + unsigned short c; + + if (!tmp.alloc) { + tmp.alloc = line.length + 1; + if (tmp.alloc < 50) + tmp.alloc = 50; + tmp.text = ck_malloc (tmp.alloc); + } + for (p = (unsigned char *) (line.text), e = p + line.length; p < e; ) { + if (i + 2 > tmp.alloc) { + tmp.alloc *= 2; + tmp.text = ck_realloc (tmp.text, tmp.alloc); + } + c = *p++; + if (!ismbchar (c)) + c = cur_cmd->x.cmd_y.translate[c]; + else { + c = c << 8 | *p++; + /* $BF1$8%-!<$K$D$$$F$O:G8e$N$b$N$,M-8z$K$J$k$h$&$K(B, $B?5=E(B + $B$KC5:w$9$k(B. */ + /* $B$^$:(B, c < cur_cmd->x.cmd_y.translate[256 + j*2] $B$rK~(B + $B$?$9(B j $B$N:G>.CM$r5a$a$k(B. */ + for (j = 0, k = cur_cmd->x.cmd_y.mbcsize; j < k; ) { + int m = (j + k) >> 1; + + if (c >= cur_cmd->x.cmd_y.translate[256 + m*2]) + j = m + 1; + else + k = m; + } + if (0 < j + && c == cur_cmd->x.cmd_y.translate[256 + (j - 1)*2]) + /* $BD>A0$N%-!<$,0lCW$9$l$P(B, $BJQ49$9$k(B. */ + c = cur_cmd->x.cmd_y.translate[256 + (j - 1)*2 + 1]; + } + if (c >= 256) + tmp.text[i++] = c >> 8; + tmp.text[i++] = c; + } + t.text = line.text; + t.length = line.length; + t.alloc = line.alloc; + line.text = tmp.text; + line.length = i; + line.alloc = tmp.alloc; + tmp.text = t.text; + tmp.length = t.length; + tmp.alloc = t.alloc; } break; @@@@ -1871,5 +2113,6 @@@@ { fprintf (status ? stderr : stdout, "\ -Usage: %s [-nV] [--quiet] [--silent] [--version] [-e script]\n\ +Usage: %s [-nV] [--quiet] [--silent] [--version]\n\ + [-Wctype=...] [--ctype=...] [-e script]\n\ [-f script-file] [--expression=script] [--file=script-file] [file...]\n", myname); diff -ru2N sed-1.18/utils.c sed+mb1.03/utils.c --- sed-1.18/utils.c Tue May 18 06:28:49 1993 +++ sed+mb1.03/utils.c Fri Jul 2 11:19:13 1993 @@@@ -15,4 +15,6 @@@@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Jul. 2, 1993 by t^2 */ /* These routines were written as part of a library (by hack), but since most @@@@ -298,10 +300,29 @@@@ b=(struct buffer *)bb; - if(b->length+1>b->allocated) { + if(b->length+2>b->allocated) { b->allocated*=2; b->b=(char *)ck_realloc(b->b,b->allocated); } + if ((unsigned short)ch >= 256) + b->b[b->length++] = ch >> 8; b->b[b->length]=ch; b->length++; +} + +void +add2_buffer(bb,ch) +VOID *bb; +int ch; +{ + struct buffer *b; + + b=(struct buffer *)bb; + if(b->length+2>b->allocated) { + b->allocated*=2; + b->b=(char *)ck_realloc(b->b,b->allocated); + } + b->b[b->length]=ch >> 8; + b->b[b->length + 1] = ch; + b->length += 2; } d16 2 @ 1.1.1.1 log @Import FreeBSD's "ja-sed" port: GNU sed + multi-byte extension. @ text @@