head 1.6; access; symbols pkgsrc-2013Q2:1.6.0.10 pkgsrc-2013Q2-base:1.6 pkgsrc-2012Q4:1.6.0.8 pkgsrc-2012Q4-base:1.6 pkgsrc-2011Q4:1.6.0.6 pkgsrc-2011Q4-base:1.6 pkgsrc-2011Q2:1.6.0.4 pkgsrc-2011Q2-base:1.6 pkgsrc-2009Q4:1.6.0.2 pkgsrc-2009Q4-base:1.6 pkgsrc-2009Q3:1.5.0.6 pkgsrc-2009Q3-base:1.5 pkgsrc-2009Q2:1.5.0.4 pkgsrc-2009Q2-base:1.5 pkgsrc-2009Q1:1.5.0.2 pkgsrc-2009Q1-base:1.5 pkgsrc-2008Q4:1.4.0.2 pkgsrc-2008Q4-base:1.4 pkgsrc-2008Q3:1.2.0.8 pkgsrc-2008Q3-base:1.2 cube-native-xorg:1.2.0.6 cube-native-xorg-base:1.2 pkgsrc-2008Q2:1.2.0.4 pkgsrc-2008Q2-base:1.2 cwrapper:1.2.0.2 pkgsrc-2008Q1:1.1.0.6 pkgsrc-2008Q1-base:1.1 pkgsrc-2007Q4:1.1.0.4 pkgsrc-2007Q4-base:1.1 pkgsrc-2007Q3:1.1.0.2; locks; strict; comment @# @; 1.6 date 2009.12.14.06.36.57; author seb; state dead; branches; next 1.5; 1.5 date 2009.01.29.09.41.00; author joerg; state Exp; branches; next 1.4; 1.4 date 2008.12.08.13.34.47; author tron; state Exp; branches 1.4.2.1; next 1.3; 1.3 date 2008.10.10.21.58.44; author he; state dead; branches; next 1.2; 1.2 date 2008.06.01.22.04.07; author he; state Exp; branches; next 1.1; 1.1 date 2007.11.06.19.54.53; author drochner; state Exp; branches 1.1.2.1 1.1.6.1; next ; 1.4.2.1 date 2009.01.30.13.58.54; author tron; state Exp; branches; next ; 1.1.2.1 date 2007.11.06.19.54.53; author ghen; state dead; branches; next 1.1.2.2; 1.1.2.2 date 2007.11.07.12.44.34; author ghen; state Exp; branches; next ; 1.1.6.1 date 2008.06.02.09.15.44; author tron; state Exp; branches; next ; desc @@ 1.6 log @Update perl from version 5.10.0nb6 to version 5.10.1. Pkgsrc changes: - Adjust some patches - Remove patches integrated upstream Upstream changes: "5.10.1 is a maintenance release for perl 5.10, incorporating various minor bugfixes and optimisations." Nonetheless some incompatibles changes are noteworthy and are related to: - flip-flop and defined-or operators behavior in switch statement - type-based dispatch and overloading of the smart match operator - 'use feature :5.10*' semantics - Perl development switched to git - internal structure of the ext/ perl source directory changed - removal of the modules Test::Harness::Straps, ExtUtils::MakeMaker::bytes and ExtUtils::MakeMaker::vmsish - the Module::CoreList module no longer contains the %:patchlevel hash - a bugfix related to the handling of the /m modifier and qr resulted in a change of behaviour between 5.8.x and 5.10.0 (this was missing from perl 5.10.0 perldelta). For a complete list of changes see included perl5101delta(1) or http://perldoc.perl.org/perl5101delta.html @ text @$NetBSD: patch-da,v 1.5 2009/01/29 09:41:00 joerg Exp $ Fixes for CVE-2008-2827 and CVE-2008-5302, taken from: http://rt.cpan.org/Public/Bug/Display.html?id=36982 http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=85;filename=sid_fix_file_path;att=2;bug=286905 --- lib/File/Path.pm.orig 2007-12-18 10:47:07.000000000 +0000 +++ lib/File/Path.pm 2008-12-08 12:54:44.000000000 +0000 @@@@ -316,10 +316,8 @@@@ print "skipped $root\n" if $arg->{verbose}; next ROOT_DIR; } - if (!chmod $perm | 0700, $root) { - if ($Force_Writeable) { - _error($arg, "cannot make directory writeable", $canon); - } + if ($Force_Writeable && !chmod $perm | 0700, $root) { + _error($arg, "cannot make directory writeable", $canon); } print "rmdir $root\n" if $arg->{verbose}; if (rmdir $root) { @@@@ -328,7 +326,7 @@@@ } else { _error($arg, "cannot remove directory", $canon); - if (!chmod($perm, ($Is_VMS ? VMS::Filespec::fileify($root) : $root)) + if ($Force_Writeable && !chmod($perm, ($Is_VMS ? VMS::Filespec::fileify($root) : $root)) ) { _error($arg, sprintf("cannot restore permissions to 0%o",$perm), $canon); } @@@@ -350,9 +348,9 @@@@ next ROOT_DIR; } - my $nperm = $perm & 07777 | 0600; - if ($nperm != $perm and not chmod $nperm, $root) { - if ($Force_Writeable) { + if ($Force_Writeable) { + my $nperm = $perm & 07777 | 0600; + if ($nperm != $perm and not chmod $nperm, $root) { _error($arg, "cannot make file writeable", $canon); } } @ 1.5 log @Fix patch. @ text @d1 1 a1 1 $NetBSD$ @ 1.4 log @Add fixes for CVE-2008-2827 and CVE-2008-5302 from CPAN respectively Debian. While there also fix two check interpreter warnings. @ text @d1 1 a1 1 $NetBSD @ 1.4.2.1 log @Pullup ticket #2668 - joerg perl5: fix bulk build problem Revisions pulled up: - lang/perl5/patches/patch-da 1.5 --- Module Name: pkgsrc Committed By: joerg Date: Thu Jan 29 09:41:00 UTC 2009 Modified Files: pkgsrc/lang/perl5/patches: patch-da Log Message: Fix patch. @ text @d1 1 a1 1 $NetBSD$ @ 1.3 log @Update perl5 from version 5.8.8nb8 to 5.10.0. A large number of packages have had their internal regression tests run successfully with this update, including mod_perl for Apache. Pkgsrc changes: a number of our local patches are no longer needed. Upstream changes from version 5.8.8: # Core Enhancements * The feature pragma * New -E command-line switch * Defined-or operator * Switch and Smart Match operator * Regular expressions * say() * Lexical $_ * The _ prototype * UNITCHECK blocks * New Pragma, mro * readdir() may return a "short filename" on Windows * readpipe() is now overridable * Default argument for readline() * state() variables * Stacked filetest operators * UNIVERSAL::DOES() * Formats * Byte-order modifiers for pack() and unpack() * no VERSION * chdir, chmod and chown on filehandles * OS groups * Recursive sort subs * Exceptions in constant folding * Source filters in @@INC * New internal variables * Miscellaneous * UCD 5.0.0 * MAD * kill() on Windows # Incompatible Changes * Packing and UTF-8 strings * Byte/character count feature in unpack() * The $* and $# variables have been removed * substr() lvalues are no longer fixed-length * Parsing of -f _ * :unique * Effect of pragmas in eval * chdir FOO * Handling of .pmc files * $^V is now a version object instead of a v-string * @@- and @@+ in patterns * $AUTOLOAD can now be tainted * Tainting and printf * undef and signal handlers * strictures and dereferencing in defined() * (?p{}) has been removed * Pseudo-hashes have been removed * Removal of the bytecode compiler and of perlcc * Removal of the JPL * Recursive inheritance detected earlier # Modules and Pragmata * Upgrading individual core modules * Pragmata Changes * New modules * Selected Changes to Core Modules # Utility Changes # New Documentation # Performance Enhancements * In-place sorting * Lexical array access * XS-assisted SWASHGET * Constant subroutines * PERL_DONT_CREATE_GVSV * Weak references are cheaper * sort() enhancements * Memory optimisations * UTF-8 cache optimisation * Sloppy stat on Windows * Regular expressions optimisations # Installation and Configuration Improvements * Configuration improvements * Compilation improvements * Installation improvements * New Or Improved Platforms # Selected Bug Fixes # New or Changed Diagnostics # Changed Internals * Reordering of SVt_* constants * Elimination of SVt_PVBM * New type SVt_BIND * Removal of CPP symbols * Less space is used by ops * New parser * Use of const * Mathoms * AvFLAGS has been removed * av_* changes * $^H and %^H * B:: modules inheritance changed * Anonymous hash and array constructors ... See 'perldoc perldelta' or http://perldoc.perl.org/perldelta.html for explanation of each of these points. @ text @d1 1 a1 1 $NetBSD: patch-da,v 1.2 2008/06/01 22:04:07 he Exp $ d3 1 a3 4 Fix for http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2008-1927 from http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=26;filename=27_fix_regcomp_utf8;att=1;bug=454792 d5 30 a34 101 --- regcomp.c.orig 2008-06-01 22:04:17.000000000 +0200 +++ regcomp.c @@@@ -2790,6 +2790,39 @@@@ S_regpiece(pTHX_ RExC_state_t *pRExC_sta } /* + * reg_recode + * + * It returns the code point in utf8 for the value in *encp. + * value: a code value in the source encoding + * encp: a pointer to an Encode object + * + * If the result from Encode is not a single character, + * it returns U+FFFD (Replacement character) and sets *encp to NULL. + */ +STATIC UV +S_reg_recode(pTHX_ const char value, SV **encp) +{ + STRLEN numlen = 1; + SV * const sv = sv_2mortal(newSVpvn(&value, numlen)); + const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp) + : SvPVX(sv); + const STRLEN newlen = SvCUR(sv); + UV uv = UNICODE_REPLACEMENT; + + if (newlen) + uv = SvUTF8(sv) + ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT) + : *(U8*)s; + + if (!newlen || numlen != newlen) { + uv = UNICODE_REPLACEMENT; + if (encp) + *encp = NULL; + } + return uv; +} + +/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that @@@@ -3181,6 +3214,8 @@@@ tryagain: ender = grok_hex(p, &numlen, &flags, NULL); p += numlen; } + if (PL_encoding && ender < 0x100) + goto recode_encoding; break; case 'c': p++; @@@@ -3200,6 +3235,17 @@@@ tryagain: --p; goto loopdone; } + if (PL_encoding && ender < 0x100) + goto recode_encoding; + break; + recode_encoding: + { + SV* enc = PL_encoding; + ender = reg_recode((const char)(U8)ender, &enc); + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) + vWARN(p, "Invalid escape in the specified encoding"); + RExC_utf8 = 1; + } break; case '\0': if (p >= RExC_end) @@@@ -3330,32 +3376,6 @@@@ tryagain: break; } - /* If the encoding pragma is in effect recode the text of - * any EXACT-kind nodes. */ - if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) { - STRLEN oldlen = STR_LEN(ret); - SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); - - if (RExC_utf8) - SvUTF8_on(sv); - if (sv_utf8_downgrade(sv, TRUE)) { - const char * const s = sv_recode_to_utf8(sv, PL_encoding); - const STRLEN newlen = SvCUR(sv); - - if (SvUTF8(sv)) - RExC_utf8 = 1; - if (!SIZE_ONLY) { - DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", - (int)oldlen, STRING(ret), - (int)newlen, s)); - Copy(s, STRING(ret), newlen, char); - STR_LEN(ret) += newlen - oldlen; - RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); - } else - RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); - } - } - return(ret); } d36 9 a44 41 @@@@ -3733,6 +3753,8 @@@@ S_regclass(pTHX_ RExC_state_t *pRExC_sta value = grok_hex(RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; } + if (PL_encoding && value < 0x100) + goto recode_encoding; break; case 'c': value = UCHARAT(RExC_parse++); @@@@ -3740,13 +3762,24 @@@@ S_regclass(pTHX_ RExC_state_t *pRExC_sta break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - { - I32 flags = 0; - numlen = 3; - value = grok_oct(--RExC_parse, &numlen, &flags, NULL); - RExC_parse += numlen; - break; - } + { + I32 flags = 0; + numlen = 3; + value = grok_oct(--RExC_parse, &numlen, &flags, NULL); + RExC_parse += numlen; + if (PL_encoding && value < 0x100) + goto recode_encoding; + break; + } + recode_encoding: + { + SV* enc = PL_encoding; + value = reg_recode((const char)(U8)value, &enc); + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) + vWARN(RExC_parse, + "Invalid escape in the specified encoding"); + break; + } default: if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP)) vWARN2(RExC_parse, @ 1.2 log @Apply a patch from Debian to fix the security vulnerability identified by http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2008-1927. Patch fetched from http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=26;filename=27_fix_regcomp_utf8;att=1;bug=454792 which, according to comments, is from upstream change 27688. Revision bumped to nb8. @ text @d1 1 a1 1 $NetBSD: patch-da,v 1.1 2007/11/06 19:54:53 drochner Exp $ @ 1.1 log @add a patch from Redhat bugzilla #323571 to fix CVE-2007-5116: A flaw was found in Perl's regular expression engine. Specially crafted input to a regular expression can cause Perl to improperly allocate memory, possibly resulting in arbitrary code running with the permissions of the user running Perl. @ text @d1 1 a1 1 $NetBSD$ d3 6 a8 1 --- regcomp.c.orig 2006-01-08 21:59:27.000000000 +0100 d10 2 a11 15 @@@@ -135,7 +135,8 @@@@ typedef struct RExC_state_t { I32 extralen; I32 seen_zerolen; I32 seen_evals; - I32 utf8; + I32 utf8; /* pattern is utf8 or not */ + I32 orig_utf8; /* pattern was originally utf8 */ #if ADD_TO_REGEXEC char *starttry; /* -Dr: where regtry was called. */ #define RExC_starttry (pRExC_state->starttry) @@@@ -161,6 +162,7 @@@@ typedef struct RExC_state_t { #define RExC_seen_zerolen (pRExC_state->seen_zerolen) #define RExC_seen_evals (pRExC_state->seen_evals) #define RExC_utf8 (pRExC_state->utf8) +#define RExC_orig_utf8 (pRExC_state->orig_utf8) d13 24 a36 17 #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ @@@@ -1749,15 +1751,17 @@@@ Perl_pregcomp(pTHX_ char *exp, char *xen if (exp == NULL) FAIL("NULL regexp argument"); - RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; + RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; - RExC_precomp = exp; DEBUG_r({ if (!PL_colorset) reginitcolors(); PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", PL_colors[4],PL_colors[5],PL_colors[0], - (int)(xend - exp), RExC_precomp, PL_colors[1]); + (int)(xend - exp), exp, PL_colors[1]); }); d38 4 a41 18 +redo_first_pass: + RExC_precomp = exp; RExC_flags = pm->op_pmflags; RExC_sawback = 0; @@@@ -1783,6 +1787,17 @@@@ Perl_pregcomp(pTHX_ char *exp, char *xen RExC_precomp = Nullch; return(NULL); } + if (RExC_utf8 && !RExC_orig_utf8) { + STRLEN len = xend-exp; + DEBUG_r(PerlIO_printf(Perl_debug_log, + "UTF8 mismatch! Converting to utf8 for resizing and compile\n")); + exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len); + xend = exp + len; + RExC_orig_utf8 = RExC_utf8; + SAVEFREEPV(exp); + goto redo_first_pass; d43 2 d46 63 a108 1 DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size)); d110 41 a150 1 /* Small enough for pointer-storage convention? @ 1.1.6.1 log @Pullup ticket #2408 - requested by he Security patch for perl Revisions pulled up: - lang/perl5/Makefile 1.137 - lang/perl5/distinfo 1.48 - lang/perl5/patches/patch-ad 1.11 - lang/perl5/patches/patch-af 1.13 - lang/perl5/patches/patch-ag 1.11 - lang/perl5/patches/patch-ai 1.5 - lang/perl5/patches/patch-aj 1.9 - lang/perl5/patches/patch-ak 1.3 - lang/perl5/patches/patch-da 1.2 --- Module Name: pkgsrc Committed By: he Date: Sun Jun 1 22:04:07 UTC 2008 Modified Files: pkgsrc/lang/perl5: Makefile distinfo pkgsrc/lang/perl5/patches: patch-da Added Files: pkgsrc/lang/perl5/patches: patch-ad patch-af patch-ag patch-ai patch-aj patch-ak Log Message: Apply a patch from Debian to fix the security vulnerability identified by http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2008-1927. Patch fetched from http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=26;filename=27_fix_regcomp_utf8;att=1;bug=454792 which, according to comments, is from upstream change 27688. Revision bumped to nb8. @ text @d3 1 a3 6 Fix for http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2008-1927 from http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=26;filename=27_fix_regcomp_utf8;att=1;bug=454792 --- regcomp.c.orig 2008-06-01 22:04:17.000000000 +0200 d5 15 a19 2 @@@@ -2790,6 +2790,39 @@@@ S_regpiece(pTHX_ RExC_state_t *pRExC_sta } d21 17 a37 19 /* + * reg_recode + * + * It returns the code point in utf8 for the value in *encp. + * value: a code value in the source encoding + * encp: a pointer to an Encode object + * + * If the result from Encode is not a single character, + * it returns U+FFFD (Replacement character) and sets *encp to NULL. + */ +STATIC UV +S_reg_recode(pTHX_ const char value, SV **encp) +{ + STRLEN numlen = 1; + SV * const sv = sv_2mortal(newSVpvn(&value, numlen)); + const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp) + : SvPVX(sv); + const STRLEN newlen = SvCUR(sv); + UV uv = UNICODE_REPLACEMENT; d39 18 a56 9 + if (newlen) + uv = SvUTF8(sv) + ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT) + : *(U8*)s; + + if (!newlen || numlen != newlen) { + uv = UNICODE_REPLACEMENT; + if (encp) + *encp = NULL; a57 2 + return uv; +} d59 1 a59 63 +/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that @@@@ -3181,6 +3214,8 @@@@ tryagain: ender = grok_hex(p, &numlen, &flags, NULL); p += numlen; } + if (PL_encoding && ender < 0x100) + goto recode_encoding; break; case 'c': p++; @@@@ -3200,6 +3235,17 @@@@ tryagain: --p; goto loopdone; } + if (PL_encoding && ender < 0x100) + goto recode_encoding; + break; + recode_encoding: + { + SV* enc = PL_encoding; + ender = reg_recode((const char)(U8)ender, &enc); + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) + vWARN(p, "Invalid escape in the specified encoding"); + RExC_utf8 = 1; + } break; case '\0': if (p >= RExC_end) @@@@ -3330,32 +3376,6 @@@@ tryagain: break; } - /* If the encoding pragma is in effect recode the text of - * any EXACT-kind nodes. */ - if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) { - STRLEN oldlen = STR_LEN(ret); - SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); - - if (RExC_utf8) - SvUTF8_on(sv); - if (sv_utf8_downgrade(sv, TRUE)) { - const char * const s = sv_recode_to_utf8(sv, PL_encoding); - const STRLEN newlen = SvCUR(sv); - - if (SvUTF8(sv)) - RExC_utf8 = 1; - if (!SIZE_ONLY) { - DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", - (int)oldlen, STRING(ret), - (int)newlen, s)); - Copy(s, STRING(ret), newlen, char); - STR_LEN(ret) += newlen - oldlen; - RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); - } else - RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); - } - } - return(ret); } d61 1 a61 41 @@@@ -3733,6 +3753,8 @@@@ S_regclass(pTHX_ RExC_state_t *pRExC_sta value = grok_hex(RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; } + if (PL_encoding && value < 0x100) + goto recode_encoding; break; case 'c': value = UCHARAT(RExC_parse++); @@@@ -3740,13 +3762,24 @@@@ S_regclass(pTHX_ RExC_state_t *pRExC_sta break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - { - I32 flags = 0; - numlen = 3; - value = grok_oct(--RExC_parse, &numlen, &flags, NULL); - RExC_parse += numlen; - break; - } + { + I32 flags = 0; + numlen = 3; + value = grok_oct(--RExC_parse, &numlen, &flags, NULL); + RExC_parse += numlen; + if (PL_encoding && value < 0x100) + goto recode_encoding; + break; + } + recode_encoding: + { + SV* enc = PL_encoding; + value = reg_recode((const char)(U8)value, &enc); + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) + vWARN(RExC_parse, + "Invalid escape in the specified encoding"); + break; + } default: if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP)) vWARN2(RExC_parse, @ 1.1.2.1 log @file patch-da was added on branch pkgsrc-2007Q3 on 2007-11-07 12:44:34 +0000 @ text @d1 61 @ 1.1.2.2 log @Pullup ticket 2222 - requested by drochner security fix for perl - pkgsrc/lang/perl5/Makefile 1.129 - pkgsrc/lang/perl5/distinfo 1.43 - pkgsrc/lang/perl5/patches/patch-da 1.1 Module Name: pkgsrc Committed By: drochner Date: Tue Nov 6 19:54:53 UTC 2007 Modified Files: pkgsrc/lang/perl5: Makefile distinfo Added Files: pkgsrc/lang/perl5/patches: patch-da Log Message: add a patch from Redhat bugzilla #323571 to fix CVE-2007-5116: A flaw was found in Perl's regular expression engine. Specially crafted input to a regular expression can cause Perl to improperly allocate memory, possibly resulting in arbitrary code running with the permissions of the user running Perl. @ text @a0 61 $NetBSD$ --- regcomp.c.orig 2006-01-08 21:59:27.000000000 +0100 +++ regcomp.c @@@@ -135,7 +135,8 @@@@ typedef struct RExC_state_t { I32 extralen; I32 seen_zerolen; I32 seen_evals; - I32 utf8; + I32 utf8; /* pattern is utf8 or not */ + I32 orig_utf8; /* pattern was originally utf8 */ #if ADD_TO_REGEXEC char *starttry; /* -Dr: where regtry was called. */ #define RExC_starttry (pRExC_state->starttry) @@@@ -161,6 +162,7 @@@@ typedef struct RExC_state_t { #define RExC_seen_zerolen (pRExC_state->seen_zerolen) #define RExC_seen_evals (pRExC_state->seen_evals) #define RExC_utf8 (pRExC_state->utf8) +#define RExC_orig_utf8 (pRExC_state->orig_utf8) #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ @@@@ -1749,15 +1751,17 @@@@ Perl_pregcomp(pTHX_ char *exp, char *xen if (exp == NULL) FAIL("NULL regexp argument"); - RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; + RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; - RExC_precomp = exp; DEBUG_r({ if (!PL_colorset) reginitcolors(); PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", PL_colors[4],PL_colors[5],PL_colors[0], - (int)(xend - exp), RExC_precomp, PL_colors[1]); + (int)(xend - exp), exp, PL_colors[1]); }); + +redo_first_pass: + RExC_precomp = exp; RExC_flags = pm->op_pmflags; RExC_sawback = 0; @@@@ -1783,6 +1787,17 @@@@ Perl_pregcomp(pTHX_ char *exp, char *xen RExC_precomp = Nullch; return(NULL); } + if (RExC_utf8 && !RExC_orig_utf8) { + STRLEN len = xend-exp; + DEBUG_r(PerlIO_printf(Perl_debug_log, + "UTF8 mismatch! Converting to utf8 for resizing and compile\n")); + exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len); + xend = exp + len; + RExC_orig_utf8 = RExC_utf8; + SAVEFREEPV(exp); + goto redo_first_pass; + } + DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size)); /* Small enough for pointer-storage convention? @