鬼車の最新版である5.9.2にはいくつかバグが存在する。以下に述べる修正はRuby 1.9で行われたパッチを他の実装のために公開する物であり、Ruby'sまたは鬼車のライセンスとする。
r29102
commit b3545895d1bb5a72e1311022c69b882d6ce90033
Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Thu Aug 26 01:50:07 2010 +0000
* regint.h (OnigStackIndex): the type should be intptr_t.
Original Oniguruma assumes the size of long and that of void *
are equal, but it's not true on LLP64 platform: mswin64.
originally patched by shintaro kuwamoto [ruby-dev:42133]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29102 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
diff --git a/regint.h b/regint.h
index 633cd88..ca9f2b1 100644
--- a/regint.h
+++ b/regint.h
@@ -720,7 +720,7 @@ typedef struct {
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
-typedef long OnigStackIndex;
+typedef intptr_t OnigStackIndex;
typedef struct _OnigStackType {
unsigned int type;
このパッチの原案はkuwamotoさんによるものだが、[ruby-dev:42141]にてRuby'sまたは鬼車のライセンスとする了解を得ている。
r28648
commit b4608406f17097508d4666ceacf8764a746efa3e
Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Thu Jul 15 06:55:42 2010 +0000
* regexec.c (onig_search): don't skip non-ANYCHARs when
.* fails to match. This causes to fail matching
ANYCHAR_STAR with LOOK_BEHIND. This fix is workaround
and disable the optimization. [ruby-dev:41851]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@28648 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
diff --git a/regexec.c b/regexec.c
index 24422b1..c78d8ea 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3636,11 +3636,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enclen(reg->enc, s, end);
-
- while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
- prev = s;
- s += enclen(reg->enc, s, end);
- }
} while (s < range);
goto mismatch;
}
このパッチによって報告されているバグは直るが、副作用として.*の最適化が出来なくなる。
r26796
commit 76a5c00be6ee10310571f6dac5147c212f5be5d9
Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Tue Mar 2 09:40:27 2010 +0000
* regcomp.c (noname_disable_map): add NT_ANCHOR case.
Without this change, captured groups in anchors (look-ahead,
look-behind, and so on) are not removed and
unintended invalid backref error occur. [ruby-core:28235]
* regcomp.c (renumber_by_map): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26796 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
diff --git a/regcomp.c b/regcomp.c
index 57c0262..b681e66 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1794,6 +1794,20 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
}
break;
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = noname_disable_map(&(an->target), map, counter);
+ break;
+ }
+ }
+ break;
+
default:
break;
}
@@ -1852,6 +1866,20 @@ renumber_by_map(Node* node, GroupNumRemap* map)
r = renumber_node_backref(node, map);
break;
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = renumber_by_map(an->target, map);
+ break;
+ }
+ }
+ break;
+
default:
break;
}
r29939
diff --git a/regcomp.c b/regcomp.c index 4508bcf..aaa5698 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3692,6 +3692,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) int type; int r = 0; +restart: type = NTYPE(node); switch (type) { case NT_LIST: @@ -3906,6 +3907,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); if (r != 0) return r; + if (NTYPE(node) != NT_ANCHOR) goto restart; r = setup_tree(an->target, reg, state, env); } break; @@ -3918,6 +3920,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); if (r != 0) return r; + if (NTYPE(node) != NT_ANCHOR) goto restart; r = setup_tree(an->target, reg, (state | IN_NOT), env); } break; @@ -5351,6 +5354,15 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; +#ifdef ONIG_DEBUG_PARSE_TREE +# if 0 + fprintf(stderr, "ORIGINAL PARSE TREE:\n"); + if (!onig_is_prelude()) { + print_tree(stderr, root); + } +# endif +#endif + #ifdef USE_NAMED_GROUP /* mixed use named group and no-named group */ if (scan_env.num_named > 0 &&