Bug Summary

File:compat/regex/regcomp.c
Location:line 1003, column 11
Description:Access to field 'node_idx' results in a dereference of a null pointer (loaded from field 'first')

Annotated Source Code

1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
22 size_t length, reg_syntax_t syntax);
23static void re_compile_fastmap_iter (regex_t *bufp,
24 const re_dfastate_t *init_state,
25 char *fastmap);
26static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
27#ifdef RE_ENABLE_I18N
28static void free_charset (re_charset_t *cset);
29#endif /* RE_ENABLE_I18N */
30static void free_workarea_compile (regex_t *preg);
31static reg_errcode_t create_initial_state (re_dfa_t *dfa);
32#ifdef RE_ENABLE_I18N
33static void optimize_utf8 (re_dfa_t *dfa);
34#endif
35static reg_errcode_t analyze (regex_t *preg);
36static reg_errcode_t preorder (bin_tree_t *root,
37 reg_errcode_t (fn (void *, bin_tree_t *)),
38 void *extra);
39static reg_errcode_t postorder (bin_tree_t *root,
40 reg_errcode_t (fn (void *, bin_tree_t *)),
41 void *extra);
42static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
43static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
44static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
45 bin_tree_t *node);
46static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
47static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
48static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
49static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
50static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
51 unsigned int constraint);
52static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
53static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
54 int node, int root);
55static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
56static int fetch_number (re_string_t *input, re_token_t *token,
57 reg_syntax_t syntax);
58static int peek_token (re_token_t *token, re_string_t *input,
59 reg_syntax_t syntax) internal_function;
60static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
61 reg_syntax_t syntax, reg_errcode_t *err);
62static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
63 re_token_t *token, reg_syntax_t syntax,
64 int nest, reg_errcode_t *err);
65static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
66 re_token_t *token, reg_syntax_t syntax,
67 int nest, reg_errcode_t *err);
68static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
69 re_token_t *token, reg_syntax_t syntax,
70 int nest, reg_errcode_t *err);
71static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
72 re_token_t *token, reg_syntax_t syntax,
73 int nest, reg_errcode_t *err);
74static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
75 re_dfa_t *dfa, re_token_t *token,
76 reg_syntax_t syntax, reg_errcode_t *err);
77static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
78 re_token_t *token, reg_syntax_t syntax,
79 reg_errcode_t *err);
80static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
81 re_string_t *regexp,
82 re_token_t *token, int token_len,
83 re_dfa_t *dfa,
84 reg_syntax_t syntax,
85 int accept_hyphen);
86static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
87 re_string_t *regexp,
88 re_token_t *token);
89#ifdef RE_ENABLE_I18N
90static reg_errcode_t build_equiv_class (bitset_t sbcset,
91 re_charset_t *mbcset,
92 int *equiv_class_alloc,
93 const unsigned char *name);
94static reg_errcode_t build_charclass (RE_TRANSLATE_TYPEunsigned char * trans,
95 bitset_t sbcset,
96 re_charset_t *mbcset,
97 int *char_class_alloc,
98 const char *class_name,
99 reg_syntax_t syntax);
100#else /* not RE_ENABLE_I18N */
101static reg_errcode_t build_equiv_class (bitset_t sbcset,
102 const unsigned char *name);
103static reg_errcode_t build_charclass (RE_TRANSLATE_TYPEunsigned char * trans,
104 bitset_t sbcset,
105 const char *class_name,
106 reg_syntax_t syntax);
107#endif /* not RE_ENABLE_I18N */
108static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
109 RE_TRANSLATE_TYPEunsigned char * trans,
110 const char *class_name,
111 const char *extra,
112 int non_match, reg_errcode_t *err);
113static bin_tree_t *create_tree (re_dfa_t *dfa,
114 bin_tree_t *left, bin_tree_t *right,
115 re_token_type_t type);
116static bin_tree_t *create_token_tree (re_dfa_t *dfa,
117 bin_tree_t *left, bin_tree_t *right,
118 const re_token_t *token);
119static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
120static void free_token (re_token_t *node);
121static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
122static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
123
124/* This table gives an error message for each of the error codes listed
125 in regex.h. Obviously the order here has to be same as there.
126 POSIX doesn't require that we do anything for REG_NOERROR,
127 but why not be nice? */
128
129const char __re_error_msgid[] attribute_hidden =
130 {
131#define REG_NOERROR_IDX0 0
132 gettext_noop ("Success")"Success" /* REG_NOERROR */
133 "\0"
134#define REG_NOMATCH_IDX(0 + sizeof "Success") (REG_NOERROR_IDX0 + sizeof "Success")
135 gettext_noop ("No match")"No match" /* REG_NOMATCH */
136 "\0"
137#define REG_BADPAT_IDX((0 + sizeof "Success") + sizeof "No match") (REG_NOMATCH_IDX(0 + sizeof "Success") + sizeof "No match")
138 gettext_noop ("Invalid regular expression")"Invalid regular expression" /* REG_BADPAT */
139 "\0"
140#define REG_ECOLLATE_IDX(((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
)
(REG_BADPAT_IDX((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression")
141 gettext_noop ("Invalid collation character")"Invalid collation character" /* REG_ECOLLATE */
142 "\0"
143#define REG_ECTYPE_IDX((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character")
(REG_ECOLLATE_IDX(((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
)
+ sizeof "Invalid collation character")
144 gettext_noop ("Invalid character class name")"Invalid character class name" /* REG_ECTYPE */
145 "\0"
146#define REG_EESCAPE_IDX(((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
)
(REG_ECTYPE_IDX((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character")
+ sizeof "Invalid character class name")
147 gettext_noop ("Trailing backslash")"Trailing backslash" /* REG_EESCAPE */
148 "\0"
149#define REG_ESUBREG_IDX((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash")
(REG_EESCAPE_IDX(((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
)
+ sizeof "Trailing backslash")
150 gettext_noop ("Invalid back reference")"Invalid back reference" /* REG_ESUBREG */
151 "\0"
152#define REG_EBRACK_IDX(((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
)
(REG_ESUBREG_IDX((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash")
+ sizeof "Invalid back reference")
153 gettext_noop ("Unmatched [ or [^")"Unmatched [ or [^" /* REG_EBRACK */
154 "\0"
155#define REG_EPAREN_IDX((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^")
(REG_EBRACK_IDX(((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
)
+ sizeof "Unmatched [ or [^")
156 gettext_noop ("Unmatched ( or \\(")"Unmatched ( or \\(" /* REG_EPAREN */
157 "\0"
158#define REG_EBRACE_IDX(((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("
)
(REG_EPAREN_IDX((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^")
+ sizeof "Unmatched ( or \\(")
159 gettext_noop ("Unmatched \\{")"Unmatched \\{" /* REG_EBRACE */
160 "\0"
161#define REG_BADBR_IDX((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{")
(REG_EBRACE_IDX(((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("
)
+ sizeof "Unmatched \\{")
162 gettext_noop ("Invalid content of \\{\\}")"Invalid content of \\{\\}" /* REG_BADBR */
163 "\0"
164#define REG_ERANGE_IDX(((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}")
(REG_BADBR_IDX((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{")
+ sizeof "Invalid content of \\{\\}")
165 gettext_noop ("Invalid range end")"Invalid range end" /* REG_ERANGE */
166 "\0"
167#define REG_ESPACE_IDX((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end")
(REG_ERANGE_IDX(((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}")
+ sizeof "Invalid range end")
168 gettext_noop ("Memory exhausted")"Memory exhausted" /* REG_ESPACE */
169 "\0"
170#define REG_BADRPT_IDX(((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted")
(REG_ESPACE_IDX((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end")
+ sizeof "Memory exhausted")
171 gettext_noop ("Invalid preceding regular expression")"Invalid preceding regular expression" /* REG_BADRPT */
172 "\0"
173#define REG_EEND_IDX((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
)
(REG_BADRPT_IDX(((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted")
+ sizeof "Invalid preceding regular expression")
174 gettext_noop ("Premature end of regular expression")"Premature end of regular expression" /* REG_EEND */
175 "\0"
176#define REG_ESIZE_IDX(((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression")
(REG_EEND_IDX((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
)
+ sizeof "Premature end of regular expression")
177 gettext_noop ("Regular expression too big")"Regular expression too big" /* REG_ESIZE */
178 "\0"
179#define REG_ERPAREN_IDX((((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression") + sizeof "Regular expression too big"
)
(REG_ESIZE_IDX(((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression")
+ sizeof "Regular expression too big")
180 gettext_noop ("Unmatched ) or \\)")"Unmatched ) or \\)" /* REG_ERPAREN */
181 };
182
183const size_t __re_error_msgid_idx[] attribute_hidden =
184 {
185 REG_NOERROR_IDX0,
186 REG_NOMATCH_IDX(0 + sizeof "Success"),
187 REG_BADPAT_IDX((0 + sizeof "Success") + sizeof "No match"),
188 REG_ECOLLATE_IDX(((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
)
,
189 REG_ECTYPE_IDX((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character")
,
190 REG_EESCAPE_IDX(((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
)
,
191 REG_ESUBREG_IDX((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash")
,
192 REG_EBRACK_IDX(((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
)
,
193 REG_EPAREN_IDX((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^")
,
194 REG_EBRACE_IDX(((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("
)
,
195 REG_BADBR_IDX((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{")
,
196 REG_ERANGE_IDX(((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}")
,
197 REG_ESPACE_IDX((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end")
,
198 REG_BADRPT_IDX(((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted")
,
199 REG_EEND_IDX((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
)
,
200 REG_ESIZE_IDX(((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression")
,
201 REG_ERPAREN_IDX((((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression") + sizeof "Regular expression too big"
)
202 };
203
204/* Entry points for GNU code. */
205
206
207#ifdef ZOS_USS
208
209/* For ZOS USS we must define btowc */
210
211wchar_t
212btowc (int c)
213{
214 wchar_t wtmp[2];
215 char tmp[2];
216
217 tmp[0] = c;
218 tmp[1] = 0;
219
220 mbtowc (wtmp, tmp, 1);
221 return wtmp[0];
222}
223#endif
224
225/* re_compile_pattern is the GNU regular expression compiler: it
226 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
227 Returns 0 if the pattern was valid, otherwise an error string.
228
229 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
230 are set in BUFP on entry. */
231
232const char *
233re_compile_pattern (const char *pattern,
234 size_t length,
235 struct re_pattern_buffer *bufp)
236{
237 reg_errcode_t ret;
238
239 /* And GNU code determines whether or not to get register information
240 by passing null for the REGS argument to re_match, etc., not by
241 setting no_sub, unless RE_NO_SUB is set. */
242 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB((((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)
);
243
244 /* Match anchors at newline. */
245 bufp->newline_anchor = 1;
246
247 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
248
249 if (!ret)
250 return NULL((void*)0);
251 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret])(__re_error_msgid + __re_error_msgid_idx[(int) ret]);
252}
253#ifdef _LIBC
254weak_alias (__re_compile_pattern, re_compile_pattern)
255#endif
256
257/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
258 also be assigned to arbitrarily: each pattern buffer stores its own
259 syntax, so it can be changed between regex compilations. */
260/* This has no initializer because initialized variables in Emacs
261 become read-only after dumping. */
262reg_syntax_t re_syntax_options;
263
264
265/* Specify the precise syntax of regexps for compilation. This provides
266 for compatibility for various utilities which historically have
267 different, incompatible syntaxes.
268
269 The argument SYNTAX is a bit mask comprised of the various bits
270 defined in regex.h. We return the old syntax. */
271
272reg_syntax_t
273re_set_syntax (reg_syntax_t syntax)
274{
275 reg_syntax_t ret = re_syntax_options;
276
277 re_syntax_options = syntax;
278 return ret;
279}
280#ifdef _LIBC
281weak_alias (__re_set_syntax, re_set_syntax)
282#endif
283
284int
285re_compile_fastmap (struct re_pattern_buffer *bufp)
286{
287 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
288 char *fastmap = bufp->fastmap;
289
290 memset (fastmap, '\0', sizeof (char) * SBC_MAX)__builtin___memset_chk (fastmap, '\0', sizeof (char) * 256, __builtin_object_size
(fastmap, 0))
;
291 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
292 if (dfa->init_state != dfa->init_state_word)
293 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
294 if (dfa->init_state != dfa->init_state_nl)
295 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
296 if (dfa->init_state != dfa->init_state_begbuf)
297 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
298 bufp->fastmap_accurate = 1;
299 return 0;
300}
301#ifdef _LIBC
302weak_alias (__re_compile_fastmap, re_compile_fastmap)
303#endif
304
305static inline void
306__attribute ((always_inline))__attribute__ ((always_inline))
307re_set_fastmap (char *fastmap, int icase, int ch)
308{
309 fastmap[ch] = 1;
310 if (icase)
311 fastmap[tolower (ch)] = 1;
312}
313
314/* Helper function for re_compile_fastmap.
315 Compile fastmap for the initial_state INIT_STATE. */
316
317static void
318re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
319 char *fastmap)
320{
321 volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
322 int node_cnt;
323 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
));
324 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
325 {
326 int node = init_state->nodes.elems[node_cnt];
327 re_token_type_t type = dfa->nodes[node].type;
328
329 if (type == CHARACTER)
330 {
331 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
332#ifdef RE_ENABLE_I18N
333 if ((bufp->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
) && dfa->mb_cur_max > 1)
334 {
335 unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max)((unsigned char *) malloc ((dfa->mb_cur_max) * sizeof (unsigned
char)))
, *p;
336 wchar_t wc;
337 mbstate_t state;
338
339 p = buf;
340 *p++ = dfa->nodes[node].opr.c;
341 while (++node < dfa->nodes_len
342 && dfa->nodes[node].type == CHARACTER
343 && dfa->nodes[node].mb_partial)
344 *p++ = dfa->nodes[node].opr.c;
345 memset (&state, '\0', sizeof (state))__builtin___memset_chk (&state, '\0', sizeof (state), __builtin_object_size
(&state, 0))
;
346 if (__mbrtowcmbrtowc (&wc, (const char *) buf, p - buf,
347 &state) == p - buf
348 && (__wcrtombwcrtomb ((char *) buf, towlower (wc), &state)
349 != (size_t) -1))
350 re_set_fastmap (fastmap, 0, buf[0]);
351 re_free (buf)free (buf);
352 }
353#endif
354 }
355 else if (type == SIMPLE_BRACKET)
356 {
357 int i, ch;
358 for (i = 0, ch = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)
359 {
360 int j;
361 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
362 for (j = 0; j < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); ++j, ++ch)
363 if (w & ((bitset_word_t) 1 << j))
364 re_set_fastmap (fastmap, icase, ch);
365 }
366 }
367#ifdef RE_ENABLE_I18N
368 else if (type == COMPLEX_BRACKET)
369 {
370 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
371 int i;
372
373# ifdef _LIBC
374 /* See if we have to try all bytes which start multiple collation
375 elements.
376 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
377 collation element, and don't catch 'b' since 'b' is
378 the only collation element which starts from 'b' (and
379 it is caught by SIMPLE_BRACKET). */
380 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
381 && (cset->ncoll_syms || cset->nranges))
382 {
383 const int32_t *table = (const int32_t *)
384 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
385 for (i = 0; i < SBC_MAX256; ++i)
386 if (table[i] < 0)
387 re_set_fastmap (fastmap, icase, i);
388 }
389# endif /* _LIBC */
390
391 /* See if we have to start the match at all multibyte characters,
392 i.e. where we would not find an invalid sequence. This only
393 applies to multibyte character sets; for single byte character
394 sets, the SIMPLE_BRACKET again suffices. */
395 if (dfa->mb_cur_max > 1
396 && (cset->nchar_classes || cset->non_match || cset->nranges
397# ifdef _LIBC
398 || cset->nequiv_classes
399# endif /* _LIBC */
400 ))
401 {
402 unsigned char c = 0;
403 do
404 {
405 mbstate_t mbs;
406 memset (&mbs, 0, sizeof (mbs))__builtin___memset_chk (&mbs, 0, sizeof (mbs), __builtin_object_size
(&mbs, 0))
;
407 if (__mbrtowcmbrtowc (NULL((void*)0), (char *) &c, 1, &mbs) == (size_t) -2)
408 re_set_fastmap (fastmap, false(0), (int) c);
409 }
410 while (++c != 0);
411 }
412
413 else
414 {
415 /* ... Else catch all bytes which can start the mbchars. */
416 for (i = 0; i < cset->nmbchars; ++i)
417 {
418 char buf[256];
419 mbstate_t state;
420 memset (&state, '\0', sizeof (state))__builtin___memset_chk (&state, '\0', sizeof (state), __builtin_object_size
(&state, 0))
;
421 if (__wcrtombwcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
422 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
423 if ((bufp->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
) && dfa->mb_cur_max > 1)
424 {
425 if (__wcrtombwcrtomb (buf, towlower (cset->mbchars[i]), &state)
426 != (size_t) -1)
427 re_set_fastmap (fastmap, false(0), *(unsigned char *) buf);
428 }
429 }
430 }
431 }
432#endif /* RE_ENABLE_I18N */
433 else if (type == OP_PERIOD
434#ifdef RE_ENABLE_I18N
435 || type == OP_UTF8_PERIOD
436#endif /* RE_ENABLE_I18N */
437 || type == END_OF_RE)
438 {
439 memset (fastmap, '\1', sizeof (char) * SBC_MAX)__builtin___memset_chk (fastmap, '\1', sizeof (char) * 256, __builtin_object_size
(fastmap, 0))
;
440 if (type == END_OF_RE)
441 bufp->can_be_null = 1;
442 return;
443 }
444 }
445}
446
447/* Entry point for POSIX code. */
448/* regcomp takes a regular expression as a string and compiles it.
449
450 PREG is a regex_t *. We do not expect any fields to be initialized,
451 since POSIX says we shouldn't. Thus, we set
452
453 `buffer' to the compiled pattern;
454 `used' to the length of the compiled pattern;
455 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
456 REG_EXTENDED bit in CFLAGS is set; otherwise, to
457 RE_SYNTAX_POSIX_BASIC;
458 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
459 `fastmap' to an allocated space for the fastmap;
460 `fastmap_accurate' to zero;
461 `re_nsub' to the number of subexpressions in PATTERN.
462
463 PATTERN is the address of the pattern string.
464
465 CFLAGS is a series of bits which affect compilation.
466
467 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
468 use POSIX basic syntax.
469
470 If REG_NEWLINE is set, then . and [^...] don't match newline.
471 Also, regexec will try a match beginning after every newline.
472
473 If REG_ICASE is set, then we considers upper- and lowercase
474 versions of letters to be equivalent when matching.
475
476 If REG_NOSUB is set, then when PREG is passed to regexec, that
477 routine will report only success or failure, and nothing about the
478 registers.
479
480 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
481 the return codes and their meanings.) */
482
483int
484regcomp (regex_t *__restrictrestrict preg,
485 const char *__restrictrestrict pattern,
486 int cflags)
487{
488 reg_errcode_t ret;
489 reg_syntax_t syntax = ((cflags & REG_EXTENDED1) ? RE_SYNTAX_POSIX_EXTENDED((((((unsigned long int) 1) << 1) << 1) | (((((((
(unsigned long int) 1) << 1) << 1) << 1) <<
1) << 1) << 1) | (((((((((unsigned long int) 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) | (((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) | ((((((((((((((((((unsigned long int
) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1)) | (((((unsigned long int) 1) << 1) <<
1) << 1) | ((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) | ((((((((((((((unsigned long int
) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) | (((((((((((((((unsigned long int)
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) | (((((((((((((((((unsigned
long int) 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) | (((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) | (((((((((((((((((((unsigned long
int) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1))
1
'?' condition is false
490 : RE_SYNTAX_POSIX_BASIC((((((unsigned long int) 1) << 1) << 1) | (((((((
(unsigned long int) 1) << 1) << 1) << 1) <<
1) << 1) << 1) | (((((((((unsigned long int) 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) | (((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) | ((((((((((((((((((unsigned long int
) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1)) | (((unsigned long int) 1) << 1) | ((((((
(((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
))
);
491
492 preg->buffer = NULL((void*)0);
493 preg->allocated = 0;
494 preg->used = 0;
495
496 /* Try to allocate space for the fastmap. */
497 preg->fastmap = re_malloc (char, SBC_MAX)((char *) malloc ((256) * sizeof (char)));
498 if (BE (preg->fastmap == NULL, 0)__builtin_expect (preg->fastmap == ((void*)0), 0))
2
Taking false branch
499 return REG_ESPACE;
500
501 syntax |= (cflags & REG_ICASE(1 << 1)) ? RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
: 0;
3
'?' condition is false
502
503 /* If REG_NEWLINE is set, newlines are treated differently. */
504 if (cflags & REG_NEWLINE((1 << 1) << 1))
4
Taking false branch
505 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
506 syntax &= ~RE_DOT_NEWLINE((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1)
;
507 syntax |= RE_HAT_LISTS_NOT_NEWLINE((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
)
;
508 /* It also changes the matching behavior. */
509 preg->newline_anchor = 1;
510 }
511 else
512 preg->newline_anchor = 0;
513 preg->no_sub = !!(cflags & REG_NOSUB(((1 << 1) << 1) << 1));
514 preg->translate = NULL((void*)0);
515
516 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
5
Calling 're_compile_internal'
517
518 /* POSIX doesn't distinguish between an unmatched open-group and an
519 unmatched close-group: both are REG_EPAREN. */
520 if (ret == REG_ERPAREN)
521 ret = REG_EPAREN;
522
523 /* We have already checked preg->fastmap != NULL. */
524 if (BE (ret == REG_NOERROR, 1)__builtin_expect (ret == REG_NOERROR, 1))
525 /* Compute the fastmap now, since regexec cannot modify the pattern
526 buffer. This function never fails in this implementation. */
527 (void) re_compile_fastmap (preg);
528 else
529 {
530 /* Some error occurred while compiling the expression. */
531 re_free (preg->fastmap)free (preg->fastmap);
532 preg->fastmap = NULL((void*)0);
533 }
534
535 return (int) ret;
536}
537#ifdef _LIBC
538weak_alias (__regcomp, regcomp)
539#endif
540
541/* Returns a message corresponding to an error code, ERRCODE, returned
542 from either regcomp or regexec. We don't use PREG here. */
543
544size_t
545regerror(int errcode, const regex_t *__restrictrestrict preg,
546 char *__restrictrestrict errbuf, size_t errbuf_size)
547{
548 const char *msg;
549 size_t msg_size;
550
551 if (BE (errcode < 0__builtin_expect (errcode < 0 || errcode >= (int) (sizeof
(__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0
)
552 || errcode >= (int) (sizeof (__re_error_msgid_idx)__builtin_expect (errcode < 0 || errcode >= (int) (sizeof
(__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0
)
553 / sizeof (__re_error_msgid_idx[0])), 0)__builtin_expect (errcode < 0 || errcode >= (int) (sizeof
(__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0
)
)
554 /* Only error codes returned by the rest of the code should be passed
555 to this routine. If we are given anything else, or if other regex
556 code generates an invalid error code, then the program has a bug.
557 Dump core so we can fix it. */
558 abort ();
559
560 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode])(__re_error_msgid + __re_error_msgid_idx[errcode]);
561
562 msg_size = strlen (msg) + 1; /* Includes the null. */
563
564 if (BE (errbuf_size != 0, 1)__builtin_expect (errbuf_size != 0, 1))
565 {
566 if (BE (msg_size > errbuf_size, 0)__builtin_expect (msg_size > errbuf_size, 0))
567 {
568 memcpy (errbuf, msg, errbuf_size - 1)__builtin___memcpy_chk (errbuf, msg, errbuf_size - 1, __builtin_object_size
(errbuf, 0))
;
569 errbuf[errbuf_size - 1] = 0;
570 }
571 else
572 memcpy (errbuf, msg, msg_size)__builtin___memcpy_chk (errbuf, msg, msg_size, __builtin_object_size
(errbuf, 0))
;
573 }
574
575 return msg_size;
576}
577#ifdef _LIBC
578weak_alias (__regerror, regerror)
579#endif
580
581
582#ifdef RE_ENABLE_I18N
583/* This static array is used for the map to single-byte characters when
584 UTF-8 is used. Otherwise we would allocate memory just to initialize
585 it the same all the time. UTF-8 is the preferred encoding so this is
586 a worthwhile optimization. */
587#if __GNUC__4 >= 3
588static const bitset_t utf8_sb_map = {
589 /* Set the first 128 bits. */
590 [0 ... 0x80 / BITSET_WORD_BITS(sizeof (bitset_word_t) * 8) - 1] = BITSET_WORD_MAX(9223372036854775807L *2UL+1UL)
591};
592#else /* ! (__GNUC__ >= 3) */
593static bitset_t utf8_sb_map;
594#endif /* __GNUC__ >= 3 */
595#endif /* RE_ENABLE_I18N */
596
597
598static void
599free_dfa_content (re_dfa_t *dfa)
600{
601 int i, j;
602
603 if (dfa->nodes)
604 for (i = 0; i < dfa->nodes_len; ++i)
605 free_token (dfa->nodes + i);
606 re_free (dfa->nexts)free (dfa->nexts);
607 for (i = 0; i < dfa->nodes_len; ++i)
608 {
609 if (dfa->eclosures != NULL((void*)0))
610 re_node_set_free (dfa->eclosures + i)free ((dfa->eclosures + i)->elems);
611 if (dfa->inveclosures != NULL((void*)0))
612 re_node_set_free (dfa->inveclosures + i)free ((dfa->inveclosures + i)->elems);
613 if (dfa->edests != NULL((void*)0))
614 re_node_set_free (dfa->edests + i)free ((dfa->edests + i)->elems);
615 }
616 re_free (dfa->edests)free (dfa->edests);
617 re_free (dfa->eclosures)free (dfa->eclosures);
618 re_free (dfa->inveclosures)free (dfa->inveclosures);
619 re_free (dfa->nodes)free (dfa->nodes);
620
621 if (dfa->state_table)
622 for (i = 0; i <= dfa->state_hash_mask; ++i)
623 {
624 struct re_state_table_entry *entry = dfa->state_table + i;
625 for (j = 0; j < entry->num; ++j)
626 {
627 re_dfastate_t *state = entry->array[j];
628 free_state (state);
629 }
630 re_free (entry->array)free (entry->array);
631 }
632 re_free (dfa->state_table)free (dfa->state_table);
633#ifdef RE_ENABLE_I18N
634 if (dfa->sb_char != utf8_sb_map)
635 re_free (dfa->sb_char)free (dfa->sb_char);
636#endif
637 re_free (dfa->subexp_map)free (dfa->subexp_map);
638#ifdef DEBUG
639 re_free (dfa->re_str)free (dfa->re_str);
640#endif
641
642 re_free (dfa)free (dfa);
643}
644
645
646/* Free dynamically allocated space used by PREG. */
647
648void
649regfree (regex_t *preg)
650{
651 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
652 if (BE (dfa != NULL, 1)__builtin_expect (dfa != ((void*)0), 1))
653 free_dfa_content (dfa);
654 preg->buffer = NULL((void*)0);
655 preg->allocated = 0;
656
657 re_free (preg->fastmap)free (preg->fastmap);
658 preg->fastmap = NULL((void*)0);
659
660 re_free (preg->translate)free (preg->translate);
661 preg->translate = NULL((void*)0);
662}
663#ifdef _LIBC
664weak_alias (__regfreeregfree, regfree)
665#endif
666
667/* Entry points compatible with 4.2 BSD regex library. We don't define
668 them unless specifically requested. */
669
670#if defined _REGEX_RE_COMP || defined _LIBC
671
672/* BSD has one and only one pattern buffer. */
673static struct re_pattern_buffer re_comp_buf;
674
675char *
676# ifdef _LIBC
677/* Make these definitions weak in libc, so POSIX programs can redefine
678 these names if they don't use our functions, and still use
679 regcomp/regexec above without link errors. */
680weak_function
681# endif
682re_comp (s)
683 const char *s;
684{
685 reg_errcode_t ret;
686 char *fastmap;
687
688 if (!s)
689 {
690 if (!re_comp_buf.buffer)
691 return gettext ("No previous regular expression")("No previous regular expression");
692 return 0;
693 }
694
695 if (re_comp_buf.buffer)
696 {
697 fastmap = re_comp_buf.fastmap;
698 re_comp_buf.fastmap = NULL((void*)0);
699 __regfreeregfree (&re_comp_buf);
700 memset (&re_comp_buf, '\0', sizeof (re_comp_buf))__builtin___memset_chk (&re_comp_buf, '\0', sizeof (re_comp_buf
), __builtin_object_size (&re_comp_buf, 0))
;
701 re_comp_buf.fastmap = fastmap;
702 }
703
704 if (re_comp_buf.fastmap == NULL((void*)0))
705 {
706 re_comp_buf.fastmap = (char *) malloc (SBC_MAX256);
707 if (re_comp_buf.fastmap == NULL((void*)0))
708 return (char *) gettext (__re_error_msgid(__re_error_msgid + __re_error_msgid_idx[(int) REG_ESPACE])
709 + __re_error_msgid_idx[(int) REG_ESPACE])(__re_error_msgid + __re_error_msgid_idx[(int) REG_ESPACE]);
710 }
711
712 /* Since `re_exec' always passes NULL for the `regs' argument, we
713 don't need to initialize the pattern buffer fields which affect it. */
714
715 /* Match anchors at newlines. */
716 re_comp_buf.newline_anchor = 1;
717
718 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
719
720 if (!ret)
721 return NULL((void*)0);
722
723 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
724 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret])(__re_error_msgid + __re_error_msgid_idx[(int) ret]);
725}
726
727#ifdef _LIBC
728libc_freeres_fn (free_mem)
729{
730 __regfreeregfree (&re_comp_buf);
731}
732#endif
733
734#endif /* _REGEX_RE_COMP */
735
736/* Internal entry point.
737 Compile the regular expression PATTERN, whose length is LENGTH.
738 SYNTAX indicate regular expression's syntax. */
739
740static reg_errcode_t
741re_compile_internal (regex_t *preg, const char * pattern, size_t length,
742 reg_syntax_t syntax)
743{
744 reg_errcode_t err = REG_NOERROR;
745 re_dfa_t *dfa;
746 re_string_t regexp;
747
748 /* Initialize the pattern buffer. */
749 preg->fastmap_accurate = 0;
750 preg->syntax = syntax;
751 preg->not_bol = preg->not_eol = 0;
752 preg->used = 0;
753 preg->re_nsub = 0;
754 preg->can_be_null = 0;
755 preg->regs_allocated = REGS_UNALLOCATED0;
756
757 /* Initialize the dfa. */
758 dfa = (re_dfa_t *) preg->buffer;
759 if (BE (preg->allocated < sizeof (re_dfa_t), 0)__builtin_expect (preg->allocated < sizeof (re_dfa_t), 0
)
)
6
Taking true branch
760 {
761 /* If zero allocated, but buffer is non-null, try to realloc
762 enough space. This loses if buffer's address is bogus, but
763 that is the user's responsibility. If ->buffer is NULL this
764 is a simple allocation. */
765 dfa = re_realloc (preg->buffer, re_dfa_t, 1)((preg->buffer != ((void*)0)) ? (re_dfa_t *) realloc (preg
->buffer,(1)*sizeof(re_dfa_t)) : (re_dfa_t *) calloc(1,sizeof
(re_dfa_t)))
;
766 if (dfa == NULL((void*)0))
7
Assuming 'dfa' is not equal to null
8
Taking false branch
767 return REG_ESPACE;
768 preg->allocated = sizeof (re_dfa_t);
769 preg->buffer = (unsigned char *) dfa;
770 }
771 preg->used = sizeof (re_dfa_t);
772
773 err = init_dfa (dfa, length);
774 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
9
Taking false branch
775 {
776 free_dfa_content (dfa);
777 preg->buffer = NULL((void*)0);
778 preg->allocated = 0;
779 return err;
780 }
781#ifdef DEBUG
782 /* Note: length+1 will not overflow since it is checked in init_dfa. */
783 dfa->re_str = re_malloc (char, length + 1)((char *) malloc ((length + 1) * sizeof (char)));
784 strncpy (dfa->re_str, pattern, length + 1)__builtin___strncpy_chk (dfa->re_str, pattern, length + 1,
__builtin_object_size (dfa->re_str, 2 > 1 ? 1 : 0))
;
785#endif
786
787 __libc_lock_init (dfa->lock)do { } while (0);
788
789 err = re_string_construct (&regexp, pattern, length, preg->translate,
790 syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
, dfa);
791 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
10
Taking false branch
792 {
793 re_compile_internal_free_return:
794 free_workarea_compile (preg);
795 re_string_destruct (&regexp);
796 free_dfa_content (dfa);
797 preg->buffer = NULL((void*)0);
798 preg->allocated = 0;
799 return err;
800 }
801
802 /* Parse the regular expression, and build a structure tree. */
803 preg->re_nsub = 0;
804 dfa->str_tree = parse (&regexp, preg, syntax, &err);
11
Calling 'parse'
24
Returning from 'parse'
805 if (BE (dfa->str_tree == NULL, 0)__builtin_expect (dfa->str_tree == ((void*)0), 0))
25
Taking false branch
806 goto re_compile_internal_free_return;
807
808 /* Analyze the tree and create the nfa. */
809 err = analyze (preg);
810 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
26
Taking false branch
811 goto re_compile_internal_free_return;
812
813#ifdef RE_ENABLE_I18N
814 /* If possible, do searching in single byte encoding to speed things up. */
815 if (dfa->is_utf8 && !(syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
) && preg->translate == NULL((void*)0))
816 optimize_utf8 (dfa);
817#endif
818
819 /* Then create the initial state of the dfa. */
820 err = create_initial_state (dfa);
27
Calling 'create_initial_state'
821
822 /* Release work areas. */
823 free_workarea_compile (preg);
824 re_string_destruct (&regexp);
825
826 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
827 {
828 free_dfa_content (dfa);
829 preg->buffer = NULL((void*)0);
830 preg->allocated = 0;
831 }
832
833 return err;
834}
835
836/* Initialize DFA. We use the length of the regular expression PAT_LEN
837 as the initial length of some arrays. */
838
839static reg_errcode_t
840init_dfa (re_dfa_t *dfa, size_t pat_len)
841{
842 unsigned int table_size;
843#ifndef _LIBC
844 char *codeset_name;
845#endif
846
847 memset (dfa, '\0', sizeof (re_dfa_t))__builtin___memset_chk (dfa, '\0', sizeof (re_dfa_t), __builtin_object_size
(dfa, 0))
;
848
849 /* Force allocation of str_tree_storage the first time. */
850 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE((1024 - sizeof (void *)) / sizeof (bin_tree_t));
851
852 /* Avoid overflows. */
853 if (pat_len == SIZE_MAX18446744073709551615ULL)
854 return REG_ESPACE;
855
856 dfa->nodes_alloc = pat_len + 1;
857 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc)((re_token_t *) malloc ((dfa->nodes_alloc) * sizeof (re_token_t
)))
;
858
859 /* table_size = 2 ^ ceil(log pat_len) */
860 for (table_size = 1; ; table_size <<= 1)
861 if (table_size > pat_len)
862 break;
863
864 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
865 dfa->state_hash_mask = table_size - 1;
866
867 dfa->mb_cur_max = MB_CUR_MAX__mb_cur_max;
868#ifdef _LIBC
869 if (dfa->mb_cur_max == 6
870 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
871 dfa->is_utf8 = 1;
872 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
873 != 0);
874#else
875# ifdef HAVE_LANGINFO_CODESET
876 codeset_name = nl_langinfo (CODESET);
877# else
878 codeset_name = getenv ("LC_ALL");
879 if (codeset_name == NULL((void*)0) || codeset_name[0] == '\0')
880 codeset_name = getenv ("LC_CTYPE");
881 if (codeset_name == NULL((void*)0) || codeset_name[0] == '\0')
882 codeset_name = getenv ("LANG");
883 if (codeset_name == NULL((void*)0))
884 codeset_name = "";
885 else if (strchr (codeset_name, '.') != NULL((void*)0))
886 codeset_name = strchr (codeset_name, '.') + 1;
887# endif
888
889 /* strcasecmp isn't a standard interface. brute force check */
890#if 0
891 if (strcasecmp (codeset_name, "UTF-8") == 0
892 || strcasecmp (codeset_name, "UTF8") == 0)
893 dfa->is_utf8 = 1;
894#else
895 if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u')
896 && (codeset_name[1] == 'T' || codeset_name[1] == 't')
897 && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
898 && (codeset_name[3] == '-'
899 ? codeset_name[4] == '8' && codeset_name[5] == '\0'
900 : codeset_name[3] == '8' && codeset_name[4] == '\0'))
901 dfa->is_utf8 = 1;
902#endif
903
904 /* We check exhaustively in the loop below if this charset is a
905 superset of ASCII. */
906 dfa->map_notascii = 0;
907#endif
908
909#ifdef RE_ENABLE_I18N
910 if (dfa->mb_cur_max > 1)
911 {
912 if (dfa->is_utf8)
913 {
914#if !defined(__GNUC__4) || __GNUC__4 < 3
915 static short utf8_sb_map_inited = 0;
916
917 if (! utf8_sb_map_inited)
918 {
919 int i;
920
921 utf8_sb_map_inited = 0;
922 for (i = 0; i <= 0x80 / BITSET_WORD_BITS(sizeof (bitset_word_t) * 8) - 1; i++)
923 utf8_sb_map[i] = BITSET_WORD_MAX(9223372036854775807L *2UL+1UL);
924 }
925#endif
926 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
927 }
928 else
929 {
930 int i, j, ch;
931
932 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
933 if (BE (dfa->sb_char == NULL, 0)__builtin_expect (dfa->sb_char == ((void*)0), 0))
934 return REG_ESPACE;
935
936 /* Set the bits corresponding to single byte chars. */
937 for (i = 0, ch = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)
938 for (j = 0; j < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); ++j, ++ch)
939 {
940 wint_t wch = __btowcbtowc (ch);
941 if (wch != WEOF)
942 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
943# ifndef _LIBC
944 if (isascii (ch) && wch != ch)
945 dfa->map_notascii = 1;
946# endif
947 }
948 }
949 }
950#endif
951
952 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)__builtin_expect (dfa->nodes == ((void*)0) || dfa->state_table
== ((void*)0), 0)
)
953 return REG_ESPACE;
954 return REG_NOERROR;
955}
956
957/* Initialize WORD_CHAR table, which indicate which character is
958 "word". In this case "word" means that it is the word construction
959 character used by some operators like "\<", "\>", etc. */
960
961static void
962internal_function
963init_word_char (re_dfa_t *dfa)
964{
965 int i, j, ch;
966 dfa->word_ops_used = 1;
967 for (i = 0, ch = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)
968 for (j = 0; j < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); ++j, ++ch)
969 if (isalnum (ch) || ch == '_')
970 dfa->word_char[i] |= (bitset_word_t) 1 << j;
971}
972
973/* Free the work area which are only used while compiling. */
974
975static void
976free_workarea_compile (regex_t *preg)
977{
978 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
979 bin_tree_storage_t *storage, *next;
980 for (storage = dfa->str_tree_storage; storage; storage = next)
981 {
982 next = storage->next;
983 re_free (storage)free (storage);
984 }
985 dfa->str_tree_storage = NULL((void*)0);
986 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE((1024 - sizeof (void *)) / sizeof (bin_tree_t));
987 dfa->str_tree = NULL((void*)0);
988 re_free (dfa->org_indices)free (dfa->org_indices);
989 dfa->org_indices = NULL((void*)0);
990}
991
992/* Create initial states for all contexts. */
993
994static reg_errcode_t
995create_initial_state (re_dfa_t *dfa)
996{
997 int first, i;
998 reg_errcode_t err;
999 re_node_set init_nodes;
1000
1001 /* Initial states have the epsilon closure of the node which is
1002 the first node of the regular expression. */
1003 first = dfa->str_tree->first->node_idx;
28
Access to field 'node_idx' results in a dereference of a null pointer (loaded from field 'first')
1004 dfa->init_node = first;
1005 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1006 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1007 return err;
1008
1009 /* The back-references which are in initial states can epsilon transit,
1010 since in this case all of the subexpressions can be null.
1011 Then we add epsilon closures of the nodes which are the next nodes of
1012 the back-references. */
1013 if (dfa->nbackref > 0)
1014 for (i = 0; i < init_nodes.nelem; ++i)
1015 {
1016 int node_idx = init_nodes.elems[i];
1017 re_token_type_t type = dfa->nodes[node_idx].type;
1018
1019 int clexp_idx;
1020 if (type != OP_BACK_REF)
1021 continue;
1022 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
1023 {
1024 re_token_t *clexp_node;
1025 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1026 if (clexp_node->type == OP_CLOSE_SUBEXP
1027 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1028 break;
1029 }
1030 if (clexp_idx == init_nodes.nelem)
1031 continue;
1032
1033 if (type == OP_BACK_REF)
1034 {
1035 int dest_idx = dfa->edests[node_idx].elems[0];
1036 if (!re_node_set_contains (&init_nodes, dest_idx))
1037 {
1038 reg_errcode_t err = re_node_set_merge (&init_nodes,
1039 dfa->eclosures
1040 + dest_idx);
1041 if (err != REG_NOERROR)
1042 return err;
1043 i = 0;
1044 }
1045 }
1046 }
1047
1048 /* It must be the first time to invoke acquire_state. */
1049 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1050 /* We don't check ERR here, since the initial state must not be NULL. */
1051 if (BE (dfa->init_state == NULL, 0)__builtin_expect (dfa->init_state == ((void*)0), 0))
1052 return err;
1053 if (dfa->init_state->has_constraint)
1054 {
1055 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1056 CONTEXT_WORD1);
1057 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1058 CONTEXT_NEWLINE(1 << 1));
1059 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1060 &init_nodes,
1061 CONTEXT_NEWLINE(1 << 1)
1062 | CONTEXT_BEGBUF((1 << 1) << 1));
1063 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL__builtin_expect (dfa->init_state_word == ((void*)0) || dfa
->init_state_nl == ((void*)0) || dfa->init_state_begbuf
== ((void*)0), 0)
1064 || dfa->init_state_begbuf == NULL, 0)__builtin_expect (dfa->init_state_word == ((void*)0) || dfa
->init_state_nl == ((void*)0) || dfa->init_state_begbuf
== ((void*)0), 0)
)
1065 return err;
1066 }
1067 else
1068 dfa->init_state_word = dfa->init_state_nl
1069 = dfa->init_state_begbuf = dfa->init_state;
1070
1071 re_node_set_free (&init_nodes)free ((&init_nodes)->elems);
1072 return REG_NOERROR;
1073}
1074
1075#ifdef RE_ENABLE_I18N
1076/* If it is possible to do searching in single byte encoding instead of UTF-8
1077 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1078 DFA nodes where needed. */
1079
1080static void
1081optimize_utf8 (re_dfa_t *dfa)
1082{
1083 int node, i, mb_chars = 0, has_period = 0;
1084
1085 for (node = 0; node < dfa->nodes_len; ++node)
1086 switch (dfa->nodes[node].type)
1087 {
1088 case CHARACTER:
1089 if (dfa->nodes[node].opr.c >= 0x80)
1090 mb_chars = 1;
1091 break;
1092 case ANCHOR:
1093 switch (dfa->nodes[node].opr.ctx_type)
1094 {
1095 case LINE_FIRST:
1096 case LINE_LAST:
1097 case BUF_FIRST:
1098 case BUF_LAST:
1099 break;
1100 default:
1101 /* Word anchors etc. cannot be handled. It's okay to test
1102 opr.ctx_type since constraints (for all DFA nodes) are
1103 created by ORing one or more opr.ctx_type values. */
1104 return;
1105 }
1106 break;
1107 case OP_PERIOD:
1108 has_period = 1;
1109 break;
1110 case OP_BACK_REF:
1111 case OP_ALT:
1112 case END_OF_RE:
1113 case OP_DUP_ASTERISK:
1114 case OP_OPEN_SUBEXP:
1115 case OP_CLOSE_SUBEXP:
1116 break;
1117 case COMPLEX_BRACKET:
1118 return;
1119 case SIMPLE_BRACKET:
1120 /* Just double check. The non-ASCII range starts at 0x80. */
1121 assert (0x80 % BITSET_WORD_BITS == 0)(__builtin_expect(!(0x80 % (sizeof (bitset_word_t) * 8) == 0)
, 0) ? __assert_rtn(__func__, "compat/regex/regcomp.c", 1121,
"0x80 % BITSET_WORD_BITS == 0") : (void)0)
;
1122 for (i = 0x80 / BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)
1123 if (dfa->nodes[node].opr.sbcset[i])
1124 return;
1125 break;
1126 default:
1127 abort ();
1128 }
1129
1130 if (mb_chars || has_period)
1131 for (node = 0; node < dfa->nodes_len; ++node)
1132 {
1133 if (dfa->nodes[node].type == CHARACTER
1134 && dfa->nodes[node].opr.c >= 0x80)
1135 dfa->nodes[node].mb_partial = 0;
1136 else if (dfa->nodes[node].type == OP_PERIOD)
1137 dfa->nodes[node].type = OP_UTF8_PERIOD;
1138 }
1139
1140 /* The search can be in single byte locale. */
1141 dfa->mb_cur_max = 1;
1142 dfa->is_utf8 = 0;
1143 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1144}
1145#endif
1146
1147/* Analyze the structure tree, and calculate "first", "next", "edest",
1148 "eclosure", and "inveclosure". */
1149
1150static reg_errcode_t
1151analyze (regex_t *preg)
1152{
1153 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1154 reg_errcode_t ret;
1155
1156 /* Allocate arrays. */
1157 dfa->nexts = re_malloc (int, dfa->nodes_alloc)((int *) malloc ((dfa->nodes_alloc) * sizeof (int)));
1158 dfa->org_indices = re_malloc (int, dfa->nodes_alloc)((int *) malloc ((dfa->nodes_alloc) * sizeof (int)));
1159 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc)((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set
)))
;
1160 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc)((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set
)))
;
1161 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL__builtin_expect (dfa->nexts == ((void*)0) || dfa->org_indices
== ((void*)0) || dfa->edests == ((void*)0) || dfa->eclosures
== ((void*)0), 0)
1162 || dfa->eclosures == NULL, 0)__builtin_expect (dfa->nexts == ((void*)0) || dfa->org_indices
== ((void*)0) || dfa->edests == ((void*)0) || dfa->eclosures
== ((void*)0), 0)
)
1163 return REG_ESPACE;
1164
1165 dfa->subexp_map = re_malloc (int, preg->re_nsub)((int *) malloc ((preg->re_nsub) * sizeof (int)));
1166 if (dfa->subexp_map != NULL((void*)0))
1167 {
1168 int i;
1169 for (i = 0; i < preg->re_nsub; i++)
1170 dfa->subexp_map[i] = i;
1171 preorder (dfa->str_tree, optimize_subexps, dfa);
1172 for (i = 0; i < preg->re_nsub; i++)
1173 if (dfa->subexp_map[i] != i)
1174 break;
1175 if (i == preg->re_nsub)
1176 {
1177 free (dfa->subexp_map);
1178 dfa->subexp_map = NULL((void*)0);
1179 }
1180 }
1181
1182 ret = postorder (dfa->str_tree, lower_subexps, preg);
1183 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
1184 return ret;
1185 ret = postorder (dfa->str_tree, calc_first, dfa);
1186 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
1187 return ret;
1188 preorder (dfa->str_tree, calc_next, dfa);
1189 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1190 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
1191 return ret;
1192 ret = calc_eclosure (dfa);
1193 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
1194 return ret;
1195
1196 /* We only need this during the prune_impossible_nodes pass in regexec.c;
1197 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1198 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1199 || dfa->nbackref)
1200 {
1201 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len)((re_node_set *) malloc ((dfa->nodes_len) * sizeof (re_node_set
)))
;
1202 if (BE (dfa->inveclosures == NULL, 0)__builtin_expect (dfa->inveclosures == ((void*)0), 0))
1203 return REG_ESPACE;
1204 ret = calc_inveclosure (dfa);
1205 }
1206
1207 return ret;
1208}
1209
1210/* Our parse trees are very unbalanced, so we cannot use a stack to
1211 implement parse tree visits. Instead, we use parent pointers and
1212 some hairy code in these two functions. */
1213static reg_errcode_t
1214postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1215 void *extra)
1216{
1217 bin_tree_t *node, *prev;
1218
1219 for (node = root; ; )
1220 {
1221 /* Descend down the tree, preferably to the left (or to the right
1222 if that's the only child). */
1223 while (node->left || node->right)
1224 if (node->left)
1225 node = node->left;
1226 else
1227 node = node->right;
1228
1229 do
1230 {
1231 reg_errcode_t err = fn (extra, node);
1232 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1233 return err;
1234 if (node->parent == NULL((void*)0))
1235 return REG_NOERROR;
1236 prev = node;
1237 node = node->parent;
1238 }
1239 /* Go up while we have a node that is reached from the right. */
1240 while (node->right == prev || node->right == NULL((void*)0));
1241 node = node->right;
1242 }
1243}
1244
1245static reg_errcode_t
1246preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1247 void *extra)
1248{
1249 bin_tree_t *node;
1250
1251 for (node = root; ; )
1252 {
1253 reg_errcode_t err = fn (extra, node);
1254 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1255 return err;
1256
1257 /* Go to the left node, or up and to the right. */
1258 if (node->left)
1259 node = node->left;
1260 else
1261 {
1262 bin_tree_t *prev = NULL((void*)0);
1263 while (node->right == prev || node->right == NULL((void*)0))
1264 {
1265 prev = node;
1266 node = node->parent;
1267 if (!node)
1268 return REG_NOERROR;
1269 }
1270 node = node->right;
1271 }
1272 }
1273}
1274
1275/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1276 re_search_internal to map the inner one's opr.idx to this one's. Adjust
1277 backreferences as well. Requires a preorder visit. */
1278static reg_errcode_t
1279optimize_subexps (void *extra, bin_tree_t *node)
1280{
1281 re_dfa_t *dfa = (re_dfa_t *) extra;
1282
1283 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1284 {
1285 int idx = node->token.opr.idx;
1286 node->token.opr.idx = dfa->subexp_map[idx];
1287 dfa->used_bkref_map |= 1 << node->token.opr.idx;
1288 }
1289
1290 else if (node->token.type == SUBEXP
1291 && node->left && node->left->token.type == SUBEXP)
1292 {
1293 int other_idx = node->left->token.opr.idx;
1294
1295 node->left = node->left->left;
1296 if (node->left)
1297 node->left->parent = node;
1298
1299 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1300 if (other_idx < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8))
1301 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1302 }
1303
1304 return REG_NOERROR;
1305}
1306
1307/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1308 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1309static reg_errcode_t
1310lower_subexps (void *extra, bin_tree_t *node)
1311{
1312 regex_t *preg = (regex_t *) extra;
1313 reg_errcode_t err = REG_NOERROR;
1314
1315 if (node->left && node->left->token.type == SUBEXP)
1316 {
1317 node->left = lower_subexp (&err, preg, node->left);
1318 if (node->left)
1319 node->left->parent = node;
1320 }
1321 if (node->right && node->right->token.type == SUBEXP)
1322 {
1323 node->right = lower_subexp (&err, preg, node->right);
1324 if (node->right)
1325 node->right->parent = node;
1326 }
1327
1328 return err;
1329}
1330
1331static bin_tree_t *
1332lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1333{
1334 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1335 bin_tree_t *body = node->left;
1336 bin_tree_t *op, *cls, *tree1, *tree;
1337
1338 if (preg->no_sub
1339 /* We do not optimize empty subexpressions, because otherwise we may
1340 have bad CONCAT nodes with NULL children. This is obviously not
1341 very common, so we do not lose much. An example that triggers
1342 this case is the sed "script" /\(\)/x. */
1343 && node->left != NULL((void*)0)
1344 && (node->token.opr.idx >= BITSET_WORD_BITS(sizeof (bitset_word_t) * 8)
1345 || !(dfa->used_bkref_map
1346 & ((bitset_word_t) 1 << node->token.opr.idx))))
1347 return node->left;
1348
1349 /* Convert the SUBEXP node to the concatenation of an
1350 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1351 op = create_tree (dfa, NULL((void*)0), NULL((void*)0), OP_OPEN_SUBEXP);
1352 cls = create_tree (dfa, NULL((void*)0), NULL((void*)0), OP_CLOSE_SUBEXP);
1353 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1354 tree = create_tree (dfa, op, tree1, CONCAT);
1355 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)__builtin_expect (tree == ((void*)0) || tree1 == ((void*)0) ||
op == ((void*)0) || cls == ((void*)0), 0)
)
1356 {
1357 *err = REG_ESPACE;
1358 return NULL((void*)0);
1359 }
1360
1361 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1362 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1363 return tree;
1364}
1365
1366/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1367 nodes. Requires a postorder visit. */
1368static reg_errcode_t
1369calc_first (void *extra, bin_tree_t *node)
1370{
1371 re_dfa_t *dfa = (re_dfa_t *) extra;
1372 if (node->token.type == CONCAT)
1373 {
1374 node->first = node->left->first;
1375 node->node_idx = node->left->node_idx;
1376 }
1377 else
1378 {
1379 node->first = node;
1380 node->node_idx = re_dfa_add_node (dfa, node->token);
1381 if (BE (node->node_idx == -1, 0)__builtin_expect (node->node_idx == -1, 0))
1382 return REG_ESPACE;
1383 if (node->token.type == ANCHOR)
1384 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1385 }
1386 return REG_NOERROR;
1387}
1388
1389/* Pass 2: compute NEXT on the tree. Preorder visit. */
1390static reg_errcode_t
1391calc_next (void *extra, bin_tree_t *node)
1392{
1393 switch (node->token.type)
1394 {
1395 case OP_DUP_ASTERISK:
1396 node->left->next = node;
1397 break;
1398 case CONCAT:
1399 node->left->next = node->right->first;
1400 node->right->next = node->next;
1401 break;
1402 default:
1403 if (node->left)
1404 node->left->next = node->next;
1405 if (node->right)
1406 node->right->next = node->next;
1407 break;
1408 }
1409 return REG_NOERROR;
1410}
1411
1412/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1413static reg_errcode_t
1414link_nfa_nodes (void *extra, bin_tree_t *node)
1415{
1416 re_dfa_t *dfa = (re_dfa_t *) extra;
1417 int idx = node->node_idx;
1418 reg_errcode_t err = REG_NOERROR;
1419
1420 switch (node->token.type)
1421 {
1422 case CONCAT:
1423 break;
1424
1425 case END_OF_RE:
1426 assert (node->next == NULL)(__builtin_expect(!(node->next == ((void*)0)), 0) ? __assert_rtn
(__func__, "compat/regex/regcomp.c", 1426, "node->next == NULL"
) : (void)0)
;
1427 break;
1428
1429 case OP_DUP_ASTERISK:
1430 case OP_ALT:
1431 {
1432 int left, right;
1433 dfa->has_plural_match = 1;
1434 if (node->left != NULL((void*)0))
1435 left = node->left->first->node_idx;
1436 else
1437 left = node->next->node_idx;
1438 if (node->right != NULL((void*)0))
1439 right = node->right->first->node_idx;
1440 else
1441 right = node->next->node_idx;
1442 assert (left > -1)(__builtin_expect(!(left > -1), 0) ? __assert_rtn(__func__
, "compat/regex/regcomp.c", 1442, "left > -1") : (void)0)
;
1443 assert (right > -1)(__builtin_expect(!(right > -1), 0) ? __assert_rtn(__func__
, "compat/regex/regcomp.c", 1443, "right > -1") : (void)0)
;
1444 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1445 }
1446 break;
1447
1448 case ANCHOR:
1449 case OP_OPEN_SUBEXP:
1450 case OP_CLOSE_SUBEXP:
1451 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1452 break;
1453
1454 case OP_BACK_REF:
1455 dfa->nexts[idx] = node->next->node_idx;
1456 if (node->token.type == OP_BACK_REF)
1457 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1458 break;
1459
1460 default:
1461 assert (!IS_EPSILON_NODE (node->token.type))(__builtin_expect(!(!((node->token.type) & 8)), 0) ? __assert_rtn
(__func__, "compat/regex/regcomp.c", 1461, "!IS_EPSILON_NODE (node->token.type)"
) : (void)0)
;
1462 dfa->nexts[idx] = node->next->node_idx;
1463 break;
1464 }
1465
1466 return err;
1467}
1468
1469/* Duplicate the epsilon closure of the node ROOT_NODE.
1470 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1471 to their own constraint. */
1472
1473static reg_errcode_t
1474internal_function
1475duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1476 int root_node, unsigned int init_constraint)
1477{
1478 int org_node, clone_node, ret;
1479 unsigned int constraint = init_constraint;
1480 for (org_node = top_org_node, clone_node = top_clone_node;;)
1481 {
1482 int org_dest, clone_dest;
1483 if (dfa->nodes[org_node].type == OP_BACK_REF)
1484 {
1485 /* If the back reference epsilon-transit, its destination must
1486 also have the constraint. Then duplicate the epsilon closure
1487 of the destination of the back reference, and store it in
1488 edests of the back reference. */
1489 org_dest = dfa->nexts[org_node];
1490 re_node_set_empty (dfa->edests + clone_node)((dfa->edests + clone_node)->nelem = 0);
1491 clone_dest = duplicate_node (dfa, org_dest, constraint);
1492 if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))
1493 return REG_ESPACE;
1494 dfa->nexts[clone_node] = dfa->nexts[org_node];
1495 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1496 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1497 return REG_ESPACE;
1498 }
1499 else if (dfa->edests[org_node].nelem == 0)
1500 {
1501 /* In case of the node can't epsilon-transit, don't duplicate the
1502 destination and store the original destination as the
1503 destination of the node. */
1504 dfa->nexts[clone_node] = dfa->nexts[org_node];
1505 break;
1506 }
1507 else if (dfa->edests[org_node].nelem == 1)
1508 {
1509 /* In case of the node can epsilon-transit, and it has only one
1510 destination. */
1511 org_dest = dfa->edests[org_node].elems[0];
1512 re_node_set_empty (dfa->edests + clone_node)((dfa->edests + clone_node)->nelem = 0);
1513 /* If the node is root_node itself, it means the epsilon clsoure
1514 has a loop. Then tie it to the destination of the root_node. */
1515 if (org_node == root_node && clone_node != org_node)
1516 {
1517 ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
1518 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1519 return REG_ESPACE;
1520 break;
1521 }
1522 /* In case of the node has another constraint, add it. */
1523 constraint |= dfa->nodes[org_node].constraint;
1524 clone_dest = duplicate_node (dfa, org_dest, constraint);
1525 if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))
1526 return REG_ESPACE;
1527 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1528 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1529 return REG_ESPACE;
1530 }
1531 else /* dfa->edests[org_node].nelem == 2 */
1532 {
1533 /* In case of the node can epsilon-transit, and it has two
1534 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
1535 org_dest = dfa->edests[org_node].elems[0];
1536 re_node_set_empty (dfa->edests + clone_node)((dfa->edests + clone_node)->nelem = 0);
1537 /* Search for a duplicated node which satisfies the constraint. */
1538 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1539 if (clone_dest == -1)
1540 {
1541 /* There is no such duplicated node, create a new one. */
1542 reg_errcode_t err;
1543 clone_dest = duplicate_node (dfa, org_dest, constraint);
1544 if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))
1545 return REG_ESPACE;
1546 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1547 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1548 return REG_ESPACE;
1549 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1550 root_node, constraint);
1551 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1552 return err;
1553 }
1554 else
1555 {
1556 /* There is a duplicated node which satisfies the constraint,
1557 use it to avoid infinite loop. */
1558 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1559 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1560 return REG_ESPACE;
1561 }
1562
1563 org_dest = dfa->edests[org_node].elems[1];
1564 clone_dest = duplicate_node (dfa, org_dest, constraint);
1565 if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))
1566 return REG_ESPACE;
1567 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1568 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1569 return REG_ESPACE;
1570 }
1571 org_node = org_dest;
1572 clone_node = clone_dest;
1573 }
1574 return REG_NOERROR;
1575}
1576
1577/* Search for a node which is duplicated from the node ORG_NODE, and
1578 satisfies the constraint CONSTRAINT. */
1579
1580static int
1581search_duplicated_node (const re_dfa_t *dfa, int org_node,
1582 unsigned int constraint)
1583{
1584 int idx;
1585 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1586 {
1587 if (org_node == dfa->org_indices[idx]
1588 && constraint == dfa->nodes[idx].constraint)
1589 return idx; /* Found. */
1590 }
1591 return -1; /* Not found. */
1592}
1593
1594/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1595 Return the index of the new node, or -1 if insufficient storage is
1596 available. */
1597
1598static int
1599duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1600{
1601 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1602 if (BE (dup_idx != -1, 1)__builtin_expect (dup_idx != -1, 1))
1603 {
1604 dfa->nodes[dup_idx].constraint = constraint;
1605 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
1606 dfa->nodes[dup_idx].duplicated = 1;
1607
1608 /* Store the index of the original node. */
1609 dfa->org_indices[dup_idx] = org_idx;
1610 }
1611 return dup_idx;
1612}
1613
1614static reg_errcode_t
1615calc_inveclosure (re_dfa_t *dfa)
1616{
1617 int src, idx, ret;
1618 for (idx = 0; idx < dfa->nodes_len; ++idx)
1619 re_node_set_init_empty (dfa->inveclosures + idx)__builtin___memset_chk (dfa->inveclosures + idx, '\0', sizeof
(re_node_set), __builtin_object_size (dfa->inveclosures +
idx, 0))
;
1620
1621 for (src = 0; src < dfa->nodes_len; ++src)
1622 {
1623 int *elems = dfa->eclosures[src].elems;
1624 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1625 {
1626 ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1627 if (BE (ret == -1, 0)__builtin_expect (ret == -1, 0))
1628 return REG_ESPACE;
1629 }
1630 }
1631
1632 return REG_NOERROR;
1633}
1634
1635/* Calculate "eclosure" for all the node in DFA. */
1636
1637static reg_errcode_t
1638calc_eclosure (re_dfa_t *dfa)
1639{
1640 int node_idx, incomplete;
1641#ifdef DEBUG
1642 assert (dfa->nodes_len > 0)(__builtin_expect(!(dfa->nodes_len > 0), 0) ? __assert_rtn
(__func__, "compat/regex/regcomp.c", 1642, "dfa->nodes_len > 0"
) : (void)0)
;
1643#endif
1644 incomplete = 0;
1645 /* For each nodes, calculate epsilon closure. */
1646 for (node_idx = 0; ; ++node_idx)
1647 {
1648 reg_errcode_t err;
1649 re_node_set eclosure_elem;
1650 if (node_idx == dfa->nodes_len)
1651 {
1652 if (!incomplete)
1653 break;
1654 incomplete = 0;
1655 node_idx = 0;
1656 }
1657
1658#ifdef DEBUG
1659 assert (dfa->eclosures[node_idx].nelem != -1)(__builtin_expect(!(dfa->eclosures[node_idx].nelem != -1),
0) ? __assert_rtn(__func__, "compat/regex/regcomp.c", 1659, "dfa->eclosures[node_idx].nelem != -1"
) : (void)0)
;
1660#endif
1661
1662 /* If we have already calculated, skip it. */
1663 if (dfa->eclosures[node_idx].nelem != 0)
1664 continue;
1665 /* Calculate epsilon closure of `node_idx'. */
1666 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1667 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1668 return err;
1669
1670 if (dfa->eclosures[node_idx].nelem == 0)
1671 {
1672 incomplete = 1;
1673 re_node_set_free (&eclosure_elem)free ((&eclosure_elem)->elems);
1674 }
1675 }
1676 return REG_NOERROR;
1677}
1678
1679/* Calculate epsilon closure of NODE. */
1680
1681static reg_errcode_t
1682calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
1683{
1684 reg_errcode_t err;
1685 int i;
1686 re_node_set eclosure;
1687 int ret;
1688 int incomplete = 0;
1689 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1690 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1691 return err;
1692
1693 /* This indicates that we are calculating this node now.
1694 We reference this value to avoid infinite loop. */
1695 dfa->eclosures[node].nelem = -1;
1696
1697 /* If the current node has constraints, duplicate all nodes
1698 since they must inherit the constraints. */
1699 if (dfa->nodes[node].constraint
1700 && dfa->edests[node].nelem
1701 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1702 {
1703 err = duplicate_node_closure (dfa, node, node, node,
1704 dfa->nodes[node].constraint);
1705 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1706 return err;
1707 }
1708
1709 /* Expand each epsilon destination nodes. */
1710 if (IS_EPSILON_NODE(dfa->nodes[node].type)((dfa->nodes[node].type) & 8))
1711 for (i = 0; i < dfa->edests[node].nelem; ++i)
1712 {
1713 re_node_set eclosure_elem;
1714 int edest = dfa->edests[node].elems[i];
1715 /* If calculating the epsilon closure of `edest' is in progress,
1716 return intermediate result. */
1717 if (dfa->eclosures[edest].nelem == -1)
1718 {
1719 incomplete = 1;
1720 continue;
1721 }
1722 /* If we haven't calculated the epsilon closure of `edest' yet,
1723 calculate now. Otherwise use calculated epsilon closure. */
1724 if (dfa->eclosures[edest].nelem == 0)
1725 {
1726 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1727 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1728 return err;
1729 }
1730 else
1731 eclosure_elem = dfa->eclosures[edest];
1732 /* Merge the epsilon closure of `edest'. */
1733 err = re_node_set_merge (&eclosure, &eclosure_elem);
1734 if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))
1735 return err;
1736 /* If the epsilon closure of `edest' is incomplete,
1737 the epsilon closure of this node is also incomplete. */
1738 if (dfa->eclosures[edest].nelem == 0)
1739 {
1740 incomplete = 1;
1741 re_node_set_free (&eclosure_elem)free ((&eclosure_elem)->elems);
1742 }
1743 }
1744
1745 /* An epsilon closure includes itself. */
1746 ret = re_node_set_insert (&eclosure, node);
1747 if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))
1748 return REG_ESPACE;
1749 if (incomplete && !root)
1750 dfa->eclosures[node].nelem = 0;
1751 else
1752 dfa->eclosures[node] = eclosure;
1753 *new_set = eclosure;
1754 return REG_NOERROR;
1755}
1756
1757/* Functions for token which are used in the parser. */
1758
1759/* Fetch a token from INPUT.
1760 We must not use this function inside bracket expressions. */
1761
1762static void
1763internal_function
1764fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1765{
1766 re_string_skip_bytes (input, peek_token (result, input, syntax))((input)->cur_idx += (peek_token (result, input, syntax)));
1767}
1768
1769/* Peek a token from INPUT, and return the length of the token.
1770 We must not use this function inside bracket expressions. */
1771
1772static int
1773internal_function
1774peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1775{
1776 unsigned char c;
1777
1778 if (re_string_eoi (input)((input)->stop <= (input)->cur_idx))
1779 {
1780 token->type = END_OF_RE;
1781 return 0;
1782 }
1783
1784 c = re_string_peek_byte (input, 0)((input)->mbs[(input)->cur_idx + 0]);
1785 token->opr.c = c;
1786
1787 token->word_char = 0;
1788#ifdef RE_ENABLE_I18N
1789 token->mb_partial = 0;
1790 if (input->mb_cur_max > 1 &&
1791 !re_string_first_byte (input, re_string_cur_idx (input))((((input)->cur_idx)) == (input)->valid_len || (input)->
wcs[((input)->cur_idx)] != WEOF)
)
1792 {
1793 token->type = CHARACTER;
1794 token->mb_partial = 1;
1795 return 1;
1796 }
1797#endif
1798 if (c == '\\')
1799 {
1800 unsigned char c2;
1801 if (re_string_cur_idx (input)((input)->cur_idx) + 1 >= re_string_length (input)((input)->len))
1802 {
1803 token->type = BACK_SLASH;
1804 return 1;
1805 }
1806
1807 c2 = re_string_peek_byte_case (input, 1);
1808 token->opr.c = c2;
1809 token->type = CHARACTER;
1810#ifdef RE_ENABLE_I18N
1811 if (input->mb_cur_max > 1)
1812 {
1813 wint_t wc = re_string_wchar_at (input,
1814 re_string_cur_idx (input)((input)->cur_idx) + 1);
1815 token->word_char = IS_WIDE_WORD_CHAR (wc)(iswalnum (wc) || (wc) == L'_') != 0;
1816 }
1817 else
1818#endif
1819 token->word_char = IS_WORD_CHAR (c2)(isalnum (c2) || (c2) == '_') != 0;
1820
1821 switch (c2)
1822 {
1823 case '|':
1824 if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)
) && !(syntax & RE_NO_BK_VBAR(((((((((((((((((unsigned long int) 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1)
))
1825 token->type = OP_ALT;
1826 break;
1827 case '1': case '2': case '3': case '4': case '5':
1828 case '6': case '7': case '8': case '9':
1829 if (!(syntax & RE_NO_BK_REFS((((((((((((((((unsigned long int) 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)
))
1830 {
1831 token->type = OP_BACK_REF;
1832 token->opr.idx = c2 - '1';
1833 }
1834 break;
1835 case '<':
1836 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1837 {
1838 token->type = ANCHOR;
1839 token->opr.ctx_type = WORD_FIRST;
1840 }
1841 break;
1842 case '>':
1843 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1844 {
1845 token->type = ANCHOR;
1846 token->opr.ctx_type = WORD_LAST;
1847 }
1848 break;
1849 case 'b':
1850 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1851 {
1852 token->type = ANCHOR;
1853 token->opr.ctx_type = WORD_DELIM;
1854 }
1855 break;
1856 case 'B':
1857 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1858 {
1859 token->type = ANCHOR;
1860 token->opr.ctx_type = NOT_WORD_DELIM;
1861 }
1862 break;
1863 case 'w':
1864 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1865 token->type = OP_WORD;
1866 break;
1867 case 'W':
1868 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1869 token->type = OP_NOTWORD;
1870 break;
1871 case 's':
1872 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1873 token->type = OP_SPACE;
1874 break;
1875 case 'S':
1876 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1877 token->type = OP_NOTSPACE;
1878 break;
1879 case '`':
1880 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1881 {
1882 token->type = ANCHOR;
1883 token->opr.ctx_type = BUF_FIRST;
1884 }
1885 break;
1886 case '\'':
1887 if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)
))
1888 {
1889 token->type = ANCHOR;
1890 token->opr.ctx_type = BUF_LAST;
1891 }
1892 break;
1893 case '(':
1894 if (!(syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
))
1895 token->type = OP_OPEN_SUBEXP;
1896 break;
1897 case ')':
1898 if (!(syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
))
1899 token->type = OP_CLOSE_SUBEXP;
1900 break;
1901 case '+':
1902 if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)
) && (syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))
1903 token->type = OP_DUP_PLUS;
1904 break;
1905 case '?':
1906 if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)
) && (syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))
1907 token->type = OP_DUP_QUESTION;
1908 break;
1909 case '{':
1910 if ((syntax & RE_INTERVALS(((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)
) && (!(syntax & RE_NO_BK_BRACES((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1)
)))
1911 token->type = OP_OPEN_DUP_NUM;
1912 break;
1913 case '}':
1914 if ((syntax & RE_INTERVALS(((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)
) && (!(syntax & RE_NO_BK_BRACES((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1)
)))
1915 token->type = OP_CLOSE_DUP_NUM;
1916 break;
1917 default:
1918 break;
1919 }
1920 return 2;
1921 }
1922
1923 token->type = CHARACTER;
1924#ifdef RE_ENABLE_I18N
1925 if (input->mb_cur_max > 1)
1926 {
1927 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)((input)->cur_idx));
1928 token->word_char = IS_WIDE_WORD_CHAR (wc)(iswalnum (wc) || (wc) == L'_') != 0;
1929 }
1930 else
1931#endif
1932 token->word_char = IS_WORD_CHAR (token->opr.c)(isalnum (token->opr.c) || (token->opr.c) == '_');
1933
1934 switch (c)
1935 {
1936 case '\n':
1937 if (syntax & RE_NEWLINE_ALT(((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1)
)
1938 token->type = OP_ALT;
1939 break;
1940 case '|':
1941 if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)
) && (syntax & RE_NO_BK_VBAR(((((((((((((((((unsigned long int) 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1)
))
1942 token->type = OP_ALT;
1943 break;
1944 case '*':
1945 token->type = OP_DUP_ASTERISK;
1946 break;
1947 case '+':
1948 if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)
) && !(syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))
1949 token->type = OP_DUP_PLUS;
1950 break;
1951 case '?':
1952 if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)
) && !(syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))
1953 token->type = OP_DUP_QUESTION;
1954 break;
1955 case '{':
1956 if ((syntax & RE_INTERVALS(((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)
) && (syntax & RE_NO_BK_BRACES((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1)
))
1957 token->type = OP_OPEN_DUP_NUM;
1958 break;
1959 case '}':
1960 if ((syntax & RE_INTERVALS(((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)
) && (syntax & RE_NO_BK_BRACES((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1)
))
1961 token->type = OP_CLOSE_DUP_NUM;
1962 break;
1963 case '(':
1964 if (syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
)
1965 token->type = OP_OPEN_SUBEXP;
1966 break;
1967 case ')':
1968 if (syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
)
1969 token->type = OP_CLOSE_SUBEXP;
1970 break;
1971 case '[':
1972 token->type = OP_OPEN_BRACKET;
1973 break;
1974 case '.':
1975 token->type = OP_PERIOD;
1976 break;
1977 case '^':
1978 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS(((((unsigned long int) 1) << 1) << 1) << 1
)
| RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1)
)) &&
1979 re_string_cur_idx (input)((input)->cur_idx) != 0)
1980 {
1981 char prev = re_string_peek_byte (input, -1)((input)->mbs[(input)->cur_idx + -1]);
1982 if (!(syntax & RE_NEWLINE_ALT(((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1)
) || prev != '\n')
1983 break;
1984 }
1985 token->type = ANCHOR;
1986 token->opr.ctx_type = LINE_FIRST;
1987 break;
1988 case '$':
1989 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS(((((unsigned long int) 1) << 1) << 1) << 1
)
) &&
1990 re_string_cur_idx (input)((input)->cur_idx) + 1 != re_string_length (input)((input)->len))
1991 {
1992 re_token_t next;
1993 re_string_skip_bytes (input, 1)((input)->cur_idx += (1));
1994 peek_token (&next, input, syntax);
1995 re_string_skip_bytes (input, -1)((input)->cur_idx += (-1));
1996 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1997 break;
1998 }
1999 token->type = ANCHOR;
2000 token->opr.ctx_type = LINE_LAST;
2001 break;
2002 default:
2003 break;
2004 }
2005 return 1;
2006}
2007
2008/* Peek a token from INPUT, and return the length of the token.
2009 We must not use this function out of bracket expressions. */
2010
2011static int
2012internal_function
2013peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2014{
2015 unsigned char c;
2016 if (re_string_eoi (input)((input)->stop <= (input)->cur_idx))
2017 {
2018 token->type = END_OF_RE;
2019 return 0;
2020 }
2021 c = re_string_peek_byte (input, 0)((input)->mbs[(input)->cur_idx + 0]);
2022 token->opr.c = c;
2023
2024#ifdef RE_ENABLE_I18N
2025 if (input->mb_cur_max > 1 &&
2026 !re_string_first_byte (input, re_string_cur_idx (input))((((input)->cur_idx)) == (input)->valid_len || (input)->
wcs[((input)->cur_idx)] != WEOF)
)
2027 {
2028 token->type = CHARACTER;
2029 return 1;
2030 }
2031#endif /* RE_ENABLE_I18N */
2032
2033 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS((unsigned long int) 1))
2034 && re_string_cur_idx (input)((input)->cur_idx) + 1 < re_string_length (input)((input)->len))
2035 {
2036 /* In this case, '\' escape a character. */
2037 unsigned char c2;
2038 re_string_skip_bytes (input, 1)((input)->cur_idx += (1));
2039 c2 = re_string_peek_byte (input, 0)((input)->mbs[(input)->cur_idx + 0]);
2040 token->opr.c = c2;
2041 token->type = CHARACTER;
2042 return 1;
2043 }
2044 if (c == '[') /* '[' is a special char in a bracket exps. */
2045 {
2046 unsigned char c2;
2047 int token_len;
2048 if (re_string_cur_idx (input)((input)->cur_idx) + 1 < re_string_length (input)((input)->len))
2049 c2 = re_string_peek_byte (input, 1)((input)->mbs[(input)->cur_idx + 1]);
2050 else
2051 c2 = 0;
2052 token->opr.c = c2;
2053 token_len = 2;
2054 switch (c2)
2055 {
2056 case '.':
2057 token->type = OP_OPEN_COLL_ELEM;
2058 break;
2059 case '=':
2060 token->type = OP_OPEN_EQUIV_CLASS;
2061 break;
2062 case ':':
2063 if (syntax & RE_CHAR_CLASSES((((unsigned long int) 1) << 1) << 1))
2064 {
2065 token->type = OP_OPEN_CHAR_CLASS;
2066 break;
2067 }
2068 /* else fall through. */
2069 default:
2070 token->type = CHARACTER;
2071 token->opr.c = c;
2072 token_len = 1;
2073 break;
2074 }
2075 return token_len;
2076 }
2077 switch (c)
2078 {
2079 case '-':
2080 token->type = OP_CHARSET_RANGE;
2081 break;
2082 case ']':
2083 token->type = OP_CLOSE_BRACKET;
2084 break;
2085 case '^':
2086 token->type = OP_NON_MATCH_LIST;
2087 break;
2088 default:
2089 token->type = CHARACTER;
2090 }
2091 return 1;
2092}
2093
2094/* Functions for parser. */
2095
2096/* Entry point of the parser.
2097 Parse the regular expression REGEXP and return the structure tree.
2098 If an error has occurred, ERR is set by error code, and return NULL.
2099 This function build the following tree, from regular expression <reg_exp>:
2100 CAT
2101 / \
2102 / \
2103 <reg_exp> EOR
2104
2105 CAT means concatenation.
2106 EOR means end of regular expression. */
2107
2108static bin_tree_t *
2109parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2110 reg_errcode_t *err)
2111{
2112 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2113 bin_tree_t *tree, *eor, *root;
2114 re_token_t current_token;
2115 dfa->syntax = syntax;
2116 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1)
);
2117 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2118 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
12
Taking false branch
2119 return NULL((void*)0);
2120 eor = create_tree (dfa, NULL((void*)0), NULL((void*)0), END_OF_RE);
13
Calling 'create_tree'
20
Returning from 'create_tree'
2121 if (tree != NULL((void*)0))
21
Assuming 'tree' is equal to null
22
Taking false branch
2122 root = create_tree (dfa, tree, eor, CONCAT);
2123 else
2124 root = eor;
2125 if (BE (eor == NULL || root == NULL, 0)__builtin_expect (eor == ((void*)0) || root == ((void*)0), 0))
23
Taking false branch
2126 {
2127 *err = REG_ESPACE;
2128 return NULL((void*)0);
2129 }
2130 return root;
2131}
2132
2133/* This function build the following tree, from regular expression
2134 <branch1>|<branch2>:
2135 ALT
2136 / \
2137 / \
2138 <branch1> <branch2>
2139
2140 ALT means alternative, which represents the operator `|'. */
2141
2142static bin_tree_t *
2143parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2144 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2145{
2146 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2147 bin_tree_t *tree, *branch = NULL((void*)0);
2148 tree = parse_branch (regexp, preg, token, syntax, nest, err);
2149 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2150 return NULL((void*)0);
2151
2152 while (token->type == OP_ALT)
2153 {
2154 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1)
);
2155 if (token->type != OP_ALT && token->type != END_OF_RE
2156 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2157 {
2158 branch = parse_branch (regexp, preg, token, syntax, nest, err);
2159 if (BE (*err != REG_NOERROR && branch == NULL, 0)__builtin_expect (*err != REG_NOERROR && branch == ((
void*)0), 0)
)
2160 return NULL((void*)0);
2161 }
2162 else
2163 branch = NULL((void*)0);
2164 tree = create_tree (dfa, tree, branch, OP_ALT);
2165 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2166 {
2167 *err = REG_ESPACE;
2168 return NULL((void*)0);
2169 }
2170 }
2171 return tree;
2172}
2173
2174/* This function build the following tree, from regular expression
2175 <exp1><exp2>:
2176 CAT
2177 / \
2178 / \
2179 <exp1> <exp2>
2180
2181 CAT means concatenation. */
2182
2183static bin_tree_t *
2184parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2185 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2186{
2187 bin_tree_t *tree, *exp;
2188 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2189 tree = parse_expression (regexp, preg, token, syntax, nest, err);
2190 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2191 return NULL((void*)0);
2192
2193 while (token->type != OP_ALT && token->type != END_OF_RE
2194 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2195 {
2196 exp = parse_expression (regexp, preg, token, syntax, nest, err);
2197 if (BE (*err != REG_NOERROR && exp == NULL, 0)__builtin_expect (*err != REG_NOERROR && exp == ((void
*)0), 0)
)
2198 {
2199 return NULL((void*)0);
2200 }
2201 if (tree != NULL((void*)0) && exp != NULL((void*)0))
2202 {
2203 tree = create_tree (dfa, tree, exp, CONCAT);
2204 if (tree == NULL((void*)0))
2205 {
2206 *err = REG_ESPACE;
2207 return NULL((void*)0);
2208 }
2209 }
2210 else if (tree == NULL((void*)0))
2211 tree = exp;
2212 /* Otherwise exp == NULL, we don't need to create new tree. */
2213 }
2214 return tree;
2215}
2216
2217/* This function build the following tree, from regular expression a*:
2218 *
2219 |
2220 a
2221*/
2222
2223static bin_tree_t *
2224parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2225 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2226{
2227 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2228 bin_tree_t *tree;
2229 switch (token->type)
2230 {
2231 case CHARACTER:
2232 tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2233 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2234 {
2235 *err = REG_ESPACE;
2236 return NULL((void*)0);
2237 }
2238#ifdef RE_ENABLE_I18N
2239 if (dfa->mb_cur_max > 1)
2240 {
2241 while (!re_string_eoi (regexp)((regexp)->stop <= (regexp)->cur_idx)
2242 && !re_string_first_byte (regexp, re_string_cur_idx (regexp))((((regexp)->cur_idx)) == (regexp)->valid_len || (regexp
)->wcs[((regexp)->cur_idx)] != WEOF)
)
2243 {
2244 bin_tree_t *mbc_remain;
2245 fetch_token (token, regexp, syntax);
2246 mbc_remain = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2247 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2248 if (BE (mbc_remain == NULL || tree == NULL, 0)__builtin_expect (mbc_remain == ((void*)0) || tree == ((void*
)0), 0)
)
2249 {
2250 *err = REG_ESPACE;
2251 return NULL((void*)0);
2252 }
2253 }
2254 }
2255#endif
2256 break;
2257 case OP_OPEN_SUBEXP:
2258 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2259 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2260 return NULL((void*)0);
2261 break;
2262 case OP_OPEN_BRACKET:
2263 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2264 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2265 return NULL((void*)0);
2266 break;
2267 case OP_BACK_REF:
2268 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)__builtin_expect (dfa->completed_bkref_map & (1 <<
token->opr.idx), 1)
)
2269 {
2270 *err = REG_ESUBREG;
2271 return NULL((void*)0);
2272 }
2273 dfa->used_bkref_map |= 1 << token->opr.idx;
2274 tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2275 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2276 {
2277 *err = REG_ESPACE;
2278 return NULL((void*)0);
2279 }
2280 ++dfa->nbackref;
2281 dfa->has_mb_node = 1;
2282 break;
2283 case OP_OPEN_DUP_NUM:
2284 if (syntax & RE_CONTEXT_INVALID_DUP(((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
)
)
2285 {
2286 *err = REG_BADRPT;
2287 return NULL((void*)0);
2288 }
2289 /* FALLTHROUGH */
2290 case OP_DUP_ASTERISK:
2291 case OP_DUP_PLUS:
2292 case OP_DUP_QUESTION:
2293 if (syntax & RE_CONTEXT_INVALID_OPS(((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1)
)
2294 {
2295 *err = REG_BADRPT;
2296 return NULL((void*)0);
2297 }
2298 else if (syntax & RE_CONTEXT_INDEP_OPS((((((unsigned long int) 1) << 1) << 1) << 1
) << 1)
)
2299 {
2300 fetch_token (token, regexp, syntax);
2301 return parse_expression (regexp, preg, token, syntax, nest, err);
2302 }
2303 /* else fall through */
2304 case OP_CLOSE_SUBEXP:
2305 if ((token->type == OP_CLOSE_SUBEXP) &&
2306 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD(((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1)
))
2307 {
2308 *err = REG_ERPAREN;
2309 return NULL((void*)0);
2310 }
2311 /* else fall through */
2312 case OP_CLOSE_DUP_NUM:
2313 /* We treat it as a normal character. */
2314
2315 /* Then we can these characters as normal characters. */
2316 token->type = CHARACTER;
2317 /* mb_partial and word_char bits should be initialized already
2318 by peek_token. */
2319 tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2320 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2321 {
2322 *err = REG_ESPACE;
2323 return NULL((void*)0);
2324 }
2325 break;
2326 case ANCHOR:
2327 if ((token->opr.ctx_type
2328 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
2329 && dfa->word_ops_used == 0)
2330 init_word_char (dfa);
2331 if (token->opr.ctx_type == WORD_DELIM
2332 || token->opr.ctx_type == NOT_WORD_DELIM)
2333 {
2334 bin_tree_t *tree_first, *tree_last;
2335 if (token->opr.ctx_type == WORD_DELIM)
2336 {
2337 token->opr.ctx_type = WORD_FIRST;
2338 tree_first = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2339 token->opr.ctx_type = WORD_LAST;
2340 }
2341 else
2342 {
2343 token->opr.ctx_type = INSIDE_WORD;
2344 tree_first = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2345 token->opr.ctx_type = INSIDE_NOTWORD;
2346 }
2347 tree_last = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2348 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2349 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)__builtin_expect (tree_first == ((void*)0) || tree_last == ((
void*)0) || tree == ((void*)0), 0)
)
2350 {
2351 *err = REG_ESPACE;
2352 return NULL((void*)0);
2353 }
2354 }
2355 else
2356 {
2357 tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2358 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2359 {
2360 *err = REG_ESPACE;
2361 return NULL((void*)0);
2362 }
2363 }
2364 /* We must return here, since ANCHORs can't be followed
2365 by repetition operators.
2366 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2367 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2368 fetch_token (token, regexp, syntax);
2369 return tree;
2370 case OP_PERIOD:
2371 tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);
2372 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2373 {
2374 *err = REG_ESPACE;
2375 return NULL((void*)0);
2376 }
2377 if (dfa->mb_cur_max > 1)
2378 dfa->has_mb_node = 1;
2379 break;
2380 case OP_WORD:
2381 case OP_NOTWORD:
2382 tree = build_charclass_op (dfa, regexp->trans,
2383 "alnum",
2384 "_",
2385 token->type == OP_NOTWORD, err);
2386 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2387 return NULL((void*)0);
2388 break;
2389 case OP_SPACE:
2390 case OP_NOTSPACE:
2391 tree = build_charclass_op (dfa, regexp->trans,
2392 "space",
2393 "",
2394 token->type == OP_NOTSPACE, err);
2395 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2396 return NULL((void*)0);
2397 break;
2398 case OP_ALT:
2399 case END_OF_RE:
2400 return NULL((void*)0);
2401 case BACK_SLASH:
2402 *err = REG_EESCAPE;
2403 return NULL((void*)0);
2404 default:
2405 /* Must not happen? */
2406#ifdef DEBUG
2407 assert (0)(__builtin_expect(!(0), 0) ? __assert_rtn(__func__, "compat/regex/regcomp.c"
, 2407, "0") : (void)0)
;
2408#endif
2409 return NULL((void*)0);
2410 }
2411 fetch_token (token, regexp, syntax);
2412
2413 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2414 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2415 {
2416 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2417 if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0)
)
2418 return NULL((void*)0);
2419 /* In BRE consecutive duplications are not allowed. */
2420 if ((syntax & RE_CONTEXT_INVALID_DUP(((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
)
)
2421 && (token->type == OP_DUP_ASTERISK
2422 || token->type == OP_OPEN_DUP_NUM))
2423 {
2424 *err = REG_BADRPT;
2425 return NULL((void*)0);
2426 }
2427 }
2428
2429 return tree;
2430}
2431
2432/* This function build the following tree, from regular expression
2433 (<reg_exp>):
2434 SUBEXP
2435 |
2436 <reg_exp>
2437*/
2438
2439static bin_tree_t *
2440parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2441 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2442{
2443 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2444 bin_tree_t *tree;
2445 size_t cur_nsub;
2446 cur_nsub = preg->re_nsub++;
2447
2448 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1)
);
2449
2450 /* The subexpression may be a null string. */
2451 if (token->type == OP_CLOSE_SUBEXP)
2452 tree = NULL((void*)0);
2453 else
2454 {
2455 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2456 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)__builtin_expect (*err == REG_NOERROR && token->type
!= OP_CLOSE_SUBEXP, 0)
)
2457 *err = REG_EPAREN;
2458 if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))
2459 return NULL((void*)0);
2460 }
2461
2462 if (cur_nsub <= '9' - '1')
2463 dfa->completed_bkref_map |= 1 << cur_nsub;
2464
2465 tree = create_tree (dfa, tree, NULL((void*)0), SUBEXP);
2466 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2467 {
2468 *err = REG_ESPACE;
2469 return NULL((void*)0);
2470 }
2471 tree->token.opr.idx = cur_nsub;
2472 return tree;
2473}
2474
2475/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
2476
2477static bin_tree_t *
2478parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2479 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
2480{
2481 bin_tree_t *tree = NULL((void*)0), *old_tree = NULL((void*)0);
2482 int i, start, end, start_idx = re_string_cur_idx (regexp)((regexp)->cur_idx);
2483#ifndef RE_TOKEN_INIT_BUG
2484 re_token_t start_token = *token;
2485#else
2486 re_token_t start_token;
2487
2488 memcpy ((void *) &start_token, (void *) token, sizeof start_token)__builtin___memcpy_chk ((void *) &start_token, (void *) token
, sizeof start_token, __builtin_object_size ((void *) &start_token
, 0))
;
2489#endif
2490
2491 if (token->type == OP_OPEN_DUP_NUM)
2492 {
2493 end = 0;
2494 start = fetch_number (regexp, token, syntax);
2495 if (start == -1)
2496 {
2497 if (token->type == CHARACTER && token->opr.c == ',')
2498 start = 0; /* We treat "{,m}" as "{0,m}". */
2499 else
2500 {
2501 *err = REG_BADBR; /* <re>{} is invalid. */
2502 return NULL((void*)0);
2503 }
2504 }
2505 if (BE (start != -2, 1)__builtin_expect (start != -2, 1))
2506 {
2507 /* We treat "{n}" as "{n,n}". */
2508 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2509 : ((token->type == CHARACTER && token->opr.c == ',')
2510 ? fetch_number (regexp, token, syntax) : -2));
2511 }
2512 if (BE (start == -2 || end == -2, 0)__builtin_expect (start == -2 || end == -2, 0))
2513 {
2514 /* Invalid sequence. */
2515 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)__builtin_expect (!(syntax & ((((((((((((((((((((((unsigned
long int) 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
), 0)
)
2516 {
2517 if (token->type == END_OF_RE)
2518 *err = REG_EBRACE;
2519 else
2520 *err = REG_BADBR;
2521
2522 return NULL((void*)0);
2523 }
2524
2525 /* If the syntax bit is set, rollback. */
2526 re_string_set_index (regexp, start_idx)((regexp)->cur_idx = (start_idx));
2527 *token = start_token;
2528 token->type = CHARACTER;
2529 /* mb_partial and word_char bits should be already initialized by
2530 peek_token. */
2531 return elem;
2532 }
2533
2534 if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0)__builtin_expect ((end != -1 && start > end) || token
->type != OP_CLOSE_DUP_NUM, 0)
)
2535 {
2536 /* First number greater than second. */
2537 *err = REG_BADBR;
2538 return NULL((void*)0);
2539 }
2540 }
2541 else
2542 {
2543 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2544 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2545 }
2546
2547 fetch_token (token, regexp, syntax);
2548
2549 if (BE (elem == NULL, 0)__builtin_expect (elem == ((void*)0), 0))
2550 return NULL((void*)0);
2551 if (BE (start == 0 && end == 0, 0)__builtin_expect (start == 0 && end == 0, 0))
2552 {
2553 postorder (elem, free_tree, NULL((void*)0));
2554 return NULL((void*)0);
2555 }
2556
2557 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2558 if (BE (start > 0, 0)__builtin_expect (start > 0, 0))
2559 {
2560 tree = elem;
2561 for (i = 2; i <= start; ++i)
2562 {
2563 elem = duplicate_tree (elem, dfa);
2564 tree = create_tree (dfa, tree, elem, CONCAT);
2565 if (BE (elem == NULL || tree == NULL, 0)__builtin_expect (elem == ((void*)0) || tree == ((void*)0), 0
)
)
2566 goto parse_dup_op_espace;
2567 }
2568
2569 if (start == end)
2570 return tree;
2571
2572 /* Duplicate ELEM before it is marked optional. */
2573 elem = duplicate_tree (elem, dfa);
2574 old_tree = tree;
2575 }
2576 else
2577 old_tree = NULL((void*)0);
2578
2579 if (elem->token.type == SUBEXP)
2580 postorder (elem, mark_opt_subexp, (void *) (intptr_t) elem->token.opr.idx);
2581
2582 tree = create_tree (dfa, elem, NULL((void*)0), (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2583 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2584 goto parse_dup_op_espace;
2585
2586 /* This loop is actually executed only when end != -1,
2587 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2588 already created the start+1-th copy. */
2589 for (i = start + 2; i <= end; ++i)
2590 {
2591 elem = duplicate_tree (elem, dfa);
2592 tree = create_tree (dfa, tree, elem, CONCAT);
2593 if (BE (elem == NULL || tree == NULL, 0)__builtin_expect (elem == ((void*)0) || tree == ((void*)0), 0
)
)
2594 goto parse_dup_op_espace;
2595
2596 tree = create_tree (dfa, tree, NULL((void*)0), OP_ALT);
2597 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
2598 goto parse_dup_op_espace;
2599 }
2600
2601 if (old_tree)
2602 tree = create_tree (dfa, old_tree, tree, CONCAT);
2603
2604 return tree;
2605
2606 parse_dup_op_espace:
2607 *err = REG_ESPACE;
2608 return NULL((void*)0);
2609}
2610
2611/* Size of the names for collating symbol/equivalence_class/character_class.
2612 I'm not sure, but maybe enough. */
2613#define BRACKET_NAME_BUF_SIZE32 32
2614
2615#ifndef _LIBC
2616 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2617 Build the range expression which starts from START_ELEM, and ends
2618 at END_ELEM. The result are written to MBCSET and SBCSET.
2619 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2620 mbcset->range_ends, is a pointer argument since we may
2621 update it. */
2622
2623static reg_errcode_t
2624internal_function
2625# ifdef RE_ENABLE_I18N
2626build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
2627 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2628# else /* not RE_ENABLE_I18N */
2629build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2630 bracket_elem_t *end_elem)
2631# endif /* not RE_ENABLE_I18N */
2632{
2633 unsigned int start_ch, end_ch;
2634 /* Equivalence Classes and Character Classes can't be a range start/end. */
2635 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)
2636 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)
2637 0)__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)
)
2638 return REG_ERANGE;
2639
2640 /* We can handle no multi character collating elements without libc
2641 support. */
2642 if (BE ((start_elem->type == COLL_SYM__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)
2643 && strlen ((char *) start_elem->opr.name) > 1)__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)
2644 || (end_elem->type == COLL_SYM__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)
2645 && strlen ((char *) end_elem->opr.name) > 1), 0)__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)
)
2646 return REG_ECOLLATE;
2647
2648# ifdef RE_ENABLE_I18N
2649 {
2650 wchar_t wc;
2651 wint_t start_wc;
2652 wint_t end_wc;
2653 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2654
2655 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2656 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2657 : 0));
2658 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2659 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2660 : 0));
2661#ifdef GAWK1
2662 /*
2663 * Fedora Core 2, maybe others, have broken `btowc' that returns -1
2664 * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are
2665 * unsigned, so we don't have sign extension problems.
2666 */
2667 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2668 ? start_ch : start_elem->opr.wch);
2669 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2670 ? end_ch : end_elem->opr.wch);
2671#else
2672 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2673 ? __btowcbtowc (start_ch) : start_elem->opr.wch);
2674 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2675 ? __btowcbtowc (end_ch) : end_elem->opr.wch);
2676#endif
2677 if (start_wc == WEOF || end_wc == WEOF)
2678 return REG_ECOLLATE;
2679 cmp_buf[0] = start_wc;
2680 cmp_buf[4] = end_wc;
2681 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2682 return REG_ERANGE;
2683
2684 /* Got valid collation sequence values, add them as a new entry.
2685 However, for !_LIBC we have no collation elements: if the
2686 character set is single byte, the single byte character set
2687 that we build below suffices. parse_bracket_exp passes
2688 no MBCSET if dfa->mb_cur_max == 1. */
2689 if (mbcset)
2690 {
2691 /* Check the space of the arrays. */
2692 if (BE (*range_alloc == mbcset->nranges, 0)__builtin_expect (*range_alloc == mbcset->nranges, 0))
2693 {
2694 /* There is not enough space, need realloc. */
2695 wchar_t *new_array_start, *new_array_end;
2696 int new_nranges;
2697
2698 /* +1 in case of mbcset->nranges is 0. */
2699 new_nranges = 2 * mbcset->nranges + 1;
2700 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2701 are NULL if *range_alloc == 0. */
2702 new_array_start = re_realloc (mbcset->range_starts, wchar_t,((mbcset->range_starts != ((void*)0)) ? (wchar_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)))
2703 new_nranges)((mbcset->range_starts != ((void*)0)) ? (wchar_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)))
;
2704 new_array_end = re_realloc (mbcset->range_ends, wchar_t,((mbcset->range_ends != ((void*)0)) ? (wchar_t *) realloc (
mbcset->range_ends,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)))
2705 new_nranges)((mbcset->range_ends != ((void*)0)) ? (wchar_t *) realloc (
mbcset->range_ends,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)))
;
2706
2707 if (BE (new_array_start == NULL || new_array_end == NULL, 0)__builtin_expect (new_array_start == ((void*)0) || new_array_end
== ((void*)0), 0)
)
2708 return REG_ESPACE;
2709
2710 mbcset->range_starts = new_array_start;
2711 mbcset->range_ends = new_array_end;
2712 *range_alloc = new_nranges;
2713 }
2714
2715 mbcset->range_starts[mbcset->nranges] = start_wc;
2716 mbcset->range_ends[mbcset->nranges++] = end_wc;
2717 }
2718
2719 /* Build the table for single byte characters. */
2720 for (wc = 0; wc < SBC_MAX256; ++wc)
2721 {
2722 cmp_buf[2] = wc;
2723 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2724 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2725 bitset_set (sbcset, wc)(sbcset[wc / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << wc % (sizeof (bitset_word_t) * 8))
;
2726 }
2727 }
2728# else /* not RE_ENABLE_I18N */
2729 {
2730 unsigned int ch;
2731 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2732 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2733 : 0));
2734 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2735 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2736 : 0));
2737 if (start_ch > end_ch)
2738 return REG_ERANGE;
2739 /* Build the table for single byte characters. */
2740 for (ch = 0; ch < SBC_MAX256; ++ch)
2741 if (start_ch <= ch && ch <= end_ch)
2742 bitset_set (sbcset, ch)(sbcset[ch / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << ch % (sizeof (bitset_word_t) * 8))
;
2743 }
2744# endif /* not RE_ENABLE_I18N */
2745 return REG_NOERROR;
2746}
2747#endif /* not _LIBC */
2748
2749#ifndef _LIBC
2750/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2751 Build the collating element which is represented by NAME.
2752 The result are written to MBCSET and SBCSET.
2753 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2754 pointer argument since we may update it. */
2755
2756static reg_errcode_t
2757internal_function
2758# ifdef RE_ENABLE_I18N
2759build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2760 int *coll_sym_alloc, const unsigned char *name)
2761# else /* not RE_ENABLE_I18N */
2762build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2763# endif /* not RE_ENABLE_I18N */
2764{
2765 size_t name_len = strlen ((const char *) name);
2766 if (BE (name_len != 1, 0)__builtin_expect (name_len != 1, 0))
2767 return REG_ECOLLATE;
2768 else
2769 {
2770 bitset_set (sbcset, name[0])(sbcset[name[0] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << name[0] % (sizeof (bitset_word_t) * 8))
;
2771 return REG_NOERROR;
2772 }
2773}
2774#endif /* not _LIBC */
2775
2776/* This function parse bracket expression like "[abc]", "[a-c]",
2777 "[[.a-a.]]" etc. */
2778
2779static bin_tree_t *
2780parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2781 reg_syntax_t syntax, reg_errcode_t *err)
2782{
2783#ifdef _LIBC
2784 const unsigned char *collseqmb;
2785 const char *collseqwc;
2786 uint32_t nrules;
2787 int32_t table_size;
2788 const int32_t *symb_table;
2789 const unsigned char *extra;
2790
2791 /* Local function for parse_bracket_exp used in _LIBC environment.
2792 Seek the collating symbol entry correspondings to NAME.
2793 Return the index of the symbol in the SYMB_TABLE. */
2794
2795 auto inline int32_t
2796 __attribute ((always_inline))__attribute__ ((always_inline))
2797 seek_collating_symbol_entry (name, name_len)
2798 const unsigned char *name;
2799 size_t name_len;
2800 {
2801 int32_t hash = elem_hash ((const char *) name, name_len);
2802 int32_t elem = hash % table_size;
2803 if (symb_table[2 * elem] != 0)
2804 {
2805 int32_t second = hash % (table_size - 2) + 1;
2806
2807 do
2808 {
2809 /* First compare the hashing value. */
2810 if (symb_table[2 * elem] == hash
2811 /* Compare the length of the name. */
2812 && name_len == extra[symb_table[2 * elem + 1]]
2813 /* Compare the name. */
2814 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2815 name_len) == 0)
2816 {
2817 /* Yep, this is the entry. */
2818 break;
2819 }
2820
2821 /* Next entry. */
2822 elem += second;
2823 }
2824 while (symb_table[2 * elem] != 0);
2825 }
2826 return elem;
2827 }
2828
2829 /* Local function for parse_bracket_exp used in _LIBC environment.
2830 Look up the collation sequence value of BR_ELEM.
2831 Return the value if succeeded, UINT_MAX otherwise. */
2832
2833 auto inline unsigned int
2834 __attribute ((always_inline))__attribute__ ((always_inline))
2835 lookup_collation_sequence_value (br_elem)
2836 bracket_elem_t *br_elem;
2837 {
2838 if (br_elem->type == SB_CHAR)
2839 {
2840 /*
2841 if (MB_CUR_MAX == 1)
2842 */
2843 if (nrules == 0)
2844 return collseqmb[br_elem->opr.ch];
2845 else
2846 {
2847 wint_t wc = __btowcbtowc (br_elem->opr.ch);
2848 return __collseq_table_lookup (collseqwc, wc);
2849 }
2850 }
2851 else if (br_elem->type == MB_CHAR)
2852 {
2853 if (nrules != 0)
2854 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2855 }
2856 else if (br_elem->type == COLL_SYM)
2857 {
2858 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2859 if (nrules != 0)
2860 {
2861 int32_t elem, idx;
2862 elem = seek_collating_symbol_entry (br_elem->opr.name,
2863 sym_name_len);
2864 if (symb_table[2 * elem] != 0)
2865 {
2866 /* We found the entry. */
2867 idx = symb_table[2 * elem + 1];
2868 /* Skip the name of collating element name. */
2869 idx += 1 + extra[idx];
2870 /* Skip the byte sequence of the collating element. */
2871 idx += 1 + extra[idx];
2872 /* Adjust for the alignment. */
2873 idx = (idx + 3) & ~3;
2874 /* Skip the multibyte collation sequence value. */
2875 idx += sizeof (unsigned int);
2876 /* Skip the wide char sequence of the collating element. */
2877 idx += sizeof (unsigned int) *
2878 (1 + *(unsigned int *) (extra + idx));
2879 /* Return the collation sequence value. */
2880 return *(unsigned int *) (extra + idx);
2881 }
2882 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2883 {
2884 /* No valid character. Match it as a single byte
2885 character. */
2886 return collseqmb[br_elem->opr.name[0]];
2887 }
2888 }
2889 else if (sym_name_len == 1)
2890 return collseqmb[br_elem->opr.name[0]];
2891 }
2892 return UINT_MAX(2147483647 *2U +1U);
2893 }
2894
2895 /* Local function for parse_bracket_exp used in _LIBC environment.
2896 Build the range expression which starts from START_ELEM, and ends
2897 at END_ELEM. The result are written to MBCSET and SBCSET.
2898 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2899 mbcset->range_ends, is a pointer argument since we may
2900 update it. */
2901
2902 auto inline reg_errcode_t
2903 __attribute ((always_inline))__attribute__ ((always_inline))
2904 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2905 re_charset_t *mbcset;
2906 int *range_alloc;
2907 bitset_t sbcset;
2908 bracket_elem_t *start_elem, *end_elem;
2909 {
2910 unsigned int ch;
2911 uint32_t start_collseq;
2912 uint32_t end_collseq;
2913
2914 /* Equivalence Classes and Character Classes can't be a range
2915 start/end. */
2916 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)
2917 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)
2918 0)__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)
)
2919 return REG_ERANGE;
2920
2921 start_collseq = lookup_collation_sequence_value (start_elem);
2922 end_collseq = lookup_collation_sequence_value (end_elem);
2923 /* Check start/end collation sequence values. */
2924 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)__builtin_expect (start_collseq == (2147483647 *2U +1U) || end_collseq
== (2147483647 *2U +1U), 0)
)
2925 return REG_ECOLLATE;
2926 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)__builtin_expect ((syntax & ((((((((((((((((((unsigned long
int) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1)) && start_collseq > end_collseq, 0)
)
2927 return REG_ERANGE;
2928
2929 /* Got valid collation sequence values, add them as a new entry.
2930 However, if we have no collation elements, and the character set
2931 is single byte, the single byte character set that we
2932 build below suffices. */
2933 if (nrules > 0 || dfa->mb_cur_max > 1)
2934 {
2935 /* Check the space of the arrays. */
2936 if (BE (*range_alloc == mbcset->nranges, 0)__builtin_expect (*range_alloc == mbcset->nranges, 0))
2937 {
2938 /* There is not enough space, need realloc. */
2939 uint32_t *new_array_start;
2940 uint32_t *new_array_end;
2941 int new_nranges;
2942
2943 /* +1 in case of mbcset->nranges is 0. */
2944 new_nranges = 2 * mbcset->nranges + 1;
2945 new_array_start = re_realloc (mbcset->range_starts, uint32_t,((mbcset->range_starts != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(uint32_t)) : (
uint32_t *) calloc(new_nranges,sizeof(uint32_t)))
2946 new_nranges)((mbcset->range_starts != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(uint32_t)) : (
uint32_t *) calloc(new_nranges,sizeof(uint32_t)))
;
2947 new_array_end = re_realloc (mbcset->range_ends, uint32_t,((mbcset->range_ends != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_ends,(new_nranges)*sizeof(uint32_t)) : (uint32_t
*) calloc(new_nranges,sizeof(uint32_t)))
2948 new_nranges)((mbcset->range_ends != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_ends,(new_nranges)*sizeof(uint32_t)) : (uint32_t
*) calloc(new_nranges,sizeof(uint32_t)))
;
2949
2950 if (BE (new_array_start == NULL || new_array_end == NULL, 0)__builtin_expect (new_array_start == ((void*)0) || new_array_end
== ((void*)0), 0)
)
2951 return REG_ESPACE;
2952
2953 mbcset->range_starts = new_array_start;
2954 mbcset->range_ends = new_array_end;
2955 *range_alloc = new_nranges;
2956 }
2957
2958 mbcset->range_starts[mbcset->nranges] = start_collseq;
2959 mbcset->range_ends[mbcset->nranges++] = end_collseq;
2960 }
2961
2962 /* Build the table for single byte characters. */
2963 for (ch = 0; ch < SBC_MAX256; ch++)
2964 {
2965 uint32_t ch_collseq;
2966 /*
2967 if (MB_CUR_MAX == 1)
2968 */
2969 if (nrules == 0)
2970 ch_collseq = collseqmb[ch];
2971 else
2972 ch_collseq = __collseq_table_lookup (collseqwc, __btowcbtowc (ch));
2973 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2974 bitset_set (sbcset, ch)(sbcset[ch / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << ch % (sizeof (bitset_word_t) * 8))
;
2975 }
2976 return REG_NOERROR;
2977 }
2978
2979 /* Local function for parse_bracket_exp used in _LIBC environment.
2980 Build the collating element which is represented by NAME.
2981 The result are written to MBCSET and SBCSET.
2982 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2983 pointer argument since we may update it. */
2984
2985 auto inline reg_errcode_t
2986 __attribute ((always_inline))__attribute__ ((always_inline))
2987 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2988 re_charset_t *mbcset;
2989 int *coll_sym_alloc;
2990 bitset_t sbcset;
2991 const unsigned char *name;
2992 {
2993 int32_t elem, idx;
2994 size_t name_len = strlen ((const char *) name);
2995 if (nrules != 0)
2996 {
2997 elem = seek_collating_symbol_entry (name, name_len);
2998 if (symb_table[2 * elem] != 0)
2999 {
3000 /* We found the entry. */
3001 idx = symb_table[2 * elem + 1];
3002 /* Skip the name of collating element name. */
3003 idx += 1 + extra[idx];
3004 }
3005 else if (symb_table[2 * elem] == 0 && name_len == 1)
3006 {
3007 /* No valid character, treat it as a normal
3008 character. */
3009 bitset_set (sbcset, name[0])(sbcset[name[0] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << name[0] % (sizeof (bitset_word_t) * 8))
;
3010 return REG_NOERROR;
3011 }
3012 else
3013 return REG_ECOLLATE;
3014
3015 /* Got valid collation sequence, add it as a new entry. */
3016 /* Check the space of the arrays. */
3017 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)__builtin_expect (*coll_sym_alloc == mbcset->ncoll_syms, 0
)
)
3018 {
3019 /* Not enough, realloc it. */
3020 /* +1 in case of mbcset->ncoll_syms is 0. */
3021 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3022 /* Use realloc since mbcset->coll_syms is NULL
3023 if *alloc == 0. */
3024 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,((mbcset->coll_syms != ((void*)0)) ? (int32_t *) realloc (
mbcset->coll_syms,(new_coll_sym_alloc)*sizeof(int32_t)) : (
int32_t *) calloc(new_coll_sym_alloc,sizeof(int32_t)))
3025 new_coll_sym_alloc)((mbcset->coll_syms != ((void*)0)) ? (int32_t *) realloc (
mbcset->coll_syms,(new_coll_sym_alloc)*sizeof(int32_t)) : (
int32_t *) calloc(new_coll_sym_alloc,sizeof(int32_t)))
;
3026 if (BE (new_coll_syms == NULL, 0)__builtin_expect (new_coll_syms == ((void*)0), 0))
3027 return REG_ESPACE;
3028 mbcset->coll_syms = new_coll_syms;
3029 *coll_sym_alloc = new_coll_sym_alloc;
3030 }
3031 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3032 return REG_NOERROR;
3033 }
3034 else
3035 {
3036 if (BE (name_len != 1, 0)__builtin_expect (name_len != 1, 0))
3037 return REG_ECOLLATE;
3038 else
3039 {
3040 bitset_set (sbcset, name[0])(sbcset[name[0] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << name[0] % (sizeof (bitset_word_t) * 8))
;
3041 return REG_NOERROR;
3042 }
3043 }
3044 }
3045#endif
3046
3047 re_token_t br_token;
3048 re_bitset_ptr_t sbcset;
3049#ifdef RE_ENABLE_I18N
3050 re_charset_t *mbcset;
3051 int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
3052 int equiv_class_alloc = 0, char_class_alloc = 0;
3053#endif /* not RE_ENABLE_I18N */
3054 int non_match = 0;
3055 bin_tree_t *work_tree;
3056 int token_len;
3057 int first_round = 1;
3058#ifdef _LIBC
3059 collseqmb = (const unsigned char *)
3060 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3061 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3062 if (nrules)
3063 {
3064 /*
3065 if (MB_CUR_MAX > 1)
3066 */
3067 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3068 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3069 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3070 _NL_COLLATE_SYMB_TABLEMB);
3071 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3072 _NL_COLLATE_SYMB_EXTRAMB);
3073 }
3074#endif
3075 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3076#ifdef RE_ENABLE_I18N
3077 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3078#endif /* RE_ENABLE_I18N */
3079#ifdef RE_ENABLE_I18N
3080 if (BE (sbcset == NULL || mbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0) || mbcset == ((void*)0
), 0)
)
3081#else
3082 if (BE (sbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0), 0))
3083#endif /* RE_ENABLE_I18N */
3084 {
3085 *err = REG_ESPACE;
3086 return NULL((void*)0);
3087 }
3088
3089 token_len = peek_token_bracket (token, regexp, syntax);
3090 if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))
3091 {
3092 *err = REG_BADPAT;
3093 goto parse_bracket_exp_free_return;
3094 }
3095 if (token->type == OP_NON_MATCH_LIST)
3096 {
3097#ifdef RE_ENABLE_I18N
3098 mbcset->non_match = 1;
3099#endif /* not RE_ENABLE_I18N */
3100 non_match = 1;
3101 if (syntax & RE_HAT_LISTS_NOT_NEWLINE((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
)
)
3102 bitset_set (sbcset, '\n')(sbcset['\n' / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8))
;
3103 re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip a token. */
3104 token_len = peek_token_bracket (token, regexp, syntax);
3105 if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))
3106 {
3107 *err = REG_BADPAT;
3108 goto parse_bracket_exp_free_return;
3109 }
3110 }
3111
3112 /* We treat the first ']' as a normal character. */
3113 if (token->type == OP_CLOSE_BRACKET)
3114 token->type = CHARACTER;
3115
3116 while (1)
3117 {
3118 bracket_elem_t start_elem, end_elem;
3119 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE32];
3120 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE32];
3121 reg_errcode_t ret;
3122 int token_len2 = 0, is_range_exp = 0;
3123 re_token_t token2;
3124
3125 start_elem.opr.name = start_name_buf;
3126 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3127 syntax, first_round);
3128 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
3129 {
3130 *err = ret;
3131 goto parse_bracket_exp_free_return;
3132 }
3133 first_round = 0;
3134
3135 /* Get information about the next token. We need it in any case. */
3136 token_len = peek_token_bracket (token, regexp, syntax);
3137
3138 /* Do not check for ranges if we know they are not allowed. */
3139 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3140 {
3141 if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))
3142 {
3143 *err = REG_EBRACK;
3144 goto parse_bracket_exp_free_return;
3145 }
3146 if (token->type == OP_CHARSET_RANGE)
3147 {
3148 re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip '-'. */
3149 token_len2 = peek_token_bracket (&token2, regexp, syntax);
3150 if (BE (token2.type == END_OF_RE, 0)__builtin_expect (token2.type == END_OF_RE, 0))
3151 {
3152 *err = REG_EBRACK;
3153 goto parse_bracket_exp_free_return;
3154 }
3155 if (token2.type == OP_CLOSE_BRACKET)
3156 {
3157 /* We treat the last '-' as a normal character. */
3158 re_string_skip_bytes (regexp, -token_len)((regexp)->cur_idx += (-token_len));
3159 token->type = CHARACTER;
3160 }
3161 else
3162 is_range_exp = 1;
3163 }
3164 }
3165
3166 if (is_range_exp == 1)
3167 {
3168 end_elem.opr.name = end_name_buf;
3169 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3170 dfa, syntax, 1);
3171 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
3172 {
3173 *err = ret;
3174 goto parse_bracket_exp_free_return;
3175 }
3176
3177 token_len = peek_token_bracket (token, regexp, syntax);
3178
3179#ifdef _LIBC
3180 *err = build_range_exp (sbcset, mbcset, &range_alloc,
3181 &start_elem, &end_elem);
3182#else
3183# ifdef RE_ENABLE_I18N
3184 *err = build_range_exp (sbcset,
3185 dfa->mb_cur_max > 1 ? mbcset : NULL((void*)0),
3186 &range_alloc, &start_elem, &end_elem);
3187# else
3188 *err = build_range_exp (sbcset, &start_elem, &end_elem);
3189# endif
3190#endif /* RE_ENABLE_I18N */
3191 if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))
3192 goto parse_bracket_exp_free_return;
3193 }
3194 else
3195 {
3196 switch (start_elem.type)
3197 {
3198 case SB_CHAR:
3199 bitset_set (sbcset, start_elem.opr.ch)(sbcset[start_elem.opr.ch / (sizeof (bitset_word_t) * 8)] |= (
bitset_word_t) 1 << start_elem.opr.ch % (sizeof (bitset_word_t
) * 8))
;
3200 break;
3201#ifdef RE_ENABLE_I18N
3202 case MB_CHAR:
3203 /* Check whether the array has enough space. */
3204 if (BE (mbchar_alloc == mbcset->nmbchars, 0)__builtin_expect (mbchar_alloc == mbcset->nmbchars, 0))
3205 {
3206 wchar_t *new_mbchars;
3207 /* Not enough, realloc it. */
3208 /* +1 in case of mbcset->nmbchars is 0. */
3209 mbchar_alloc = 2 * mbcset->nmbchars + 1;
3210 /* Use realloc since array is NULL if *alloc == 0. */
3211 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,((mbcset->mbchars != ((void*)0)) ? (wchar_t *) realloc (mbcset
->mbchars,(mbchar_alloc)*sizeof(wchar_t)) : (wchar_t *) calloc
(mbchar_alloc,sizeof(wchar_t)))
3212 mbchar_alloc)((mbcset->mbchars != ((void*)0)) ? (wchar_t *) realloc (mbcset
->mbchars,(mbchar_alloc)*sizeof(wchar_t)) : (wchar_t *) calloc
(mbchar_alloc,sizeof(wchar_t)))
;
3213 if (BE (new_mbchars == NULL, 0)__builtin_expect (new_mbchars == ((void*)0), 0))
3214 goto parse_bracket_exp_espace;
3215 mbcset->mbchars = new_mbchars;
3216 }
3217 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3218 break;
3219#endif /* RE_ENABLE_I18N */
3220 case EQUIV_CLASS:
3221 *err = build_equiv_class (sbcset,
3222#ifdef RE_ENABLE_I18N
3223 mbcset, &equiv_class_alloc,
3224#endif /* RE_ENABLE_I18N */
3225 start_elem.opr.name);
3226 if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))
3227 goto parse_bracket_exp_free_return;
3228 break;
3229 case COLL_SYM:
3230 *err = build_collating_symbol (sbcset,
3231#ifdef RE_ENABLE_I18N
3232 mbcset, &coll_sym_alloc,
3233#endif /* RE_ENABLE_I18N */
3234 start_elem.opr.name);
3235 if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))
3236 goto parse_bracket_exp_free_return;
3237 break;
3238 case CHAR_CLASS:
3239 *err = build_charclass (regexp->trans, sbcset,
3240#ifdef RE_ENABLE_I18N
3241 mbcset, &char_class_alloc,
3242#endif /* RE_ENABLE_I18N */
3243 (const char *) start_elem.opr.name, syntax);
3244 if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))
3245 goto parse_bracket_exp_free_return;
3246 break;
3247 default:
3248 assert (0)(__builtin_expect(!(0), 0) ? __assert_rtn(__func__, "compat/regex/regcomp.c"
, 3248, "0") : (void)0)
;
3249 break;
3250 }
3251 }
3252 if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))
3253 {
3254 *err = REG_EBRACK;
3255 goto parse_bracket_exp_free_return;
3256 }
3257 if (token->type == OP_CLOSE_BRACKET)
3258 break;
3259 }
3260
3261 re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip a token. */
3262
3263 /* If it is non-matching list. */
3264 if (non_match)
3265 bitset_not (sbcset);
3266
3267#ifdef RE_ENABLE_I18N
3268 /* Ensure only single byte characters are set. */
3269 if (dfa->mb_cur_max > 1)
3270 bitset_mask (sbcset, dfa->sb_char);
3271
3272 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
3273 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3274 || mbcset->non_match)))
3275 {
3276 bin_tree_t *mbc_tree;
3277 int sbc_idx;
3278 /* Build a tree for complex bracket. */
3279 dfa->has_mb_node = 1;
3280 br_token.type = COMPLEX_BRACKET;
3281 br_token.opr.mbcset = mbcset;
3282 mbc_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);
3283 if (BE (mbc_tree == NULL, 0)__builtin_expect (mbc_tree == ((void*)0), 0))
3284 goto parse_bracket_exp_espace;
3285 for (sbc_idx = 0; sbc_idx < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++sbc_idx)
3286 if (sbcset[sbc_idx])
3287 break;
3288 /* If there are no bits set in sbcset, there is no point
3289 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3290 if (sbc_idx < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)))
3291 {
3292 /* Build a tree for simple bracket. */
3293 br_token.type = SIMPLE_BRACKET;
3294 br_token.opr.sbcset = sbcset;
3295 work_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);
3296 if (BE (work_tree == NULL, 0)__builtin_expect (work_tree == ((void*)0), 0))
3297 goto parse_bracket_exp_espace;
3298
3299 /* Then join them by ALT node. */
3300 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3301 if (BE (work_tree == NULL, 0)__builtin_expect (work_tree == ((void*)0), 0))
3302 goto parse_bracket_exp_espace;
3303 }
3304 else
3305 {
3306 re_free (sbcset)free (sbcset);
3307 work_tree = mbc_tree;
3308 }
3309 }
3310 else
3311#endif /* not RE_ENABLE_I18N */
3312 {
3313#ifdef RE_ENABLE_I18N
3314 free_charset (mbcset);
3315#endif
3316 /* Build a tree for simple bracket. */
3317 br_token.type = SIMPLE_BRACKET;
3318 br_token.opr.sbcset = sbcset;
3319 work_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);
3320 if (BE (work_tree == NULL, 0)__builtin_expect (work_tree == ((void*)0), 0))
3321 goto parse_bracket_exp_espace;
3322 }
3323 return work_tree;
3324
3325 parse_bracket_exp_espace:
3326 *err = REG_ESPACE;
3327 parse_bracket_exp_free_return:
3328 re_free (sbcset)free (sbcset);
3329#ifdef RE_ENABLE_I18N
3330 free_charset (mbcset);
3331#endif /* RE_ENABLE_I18N */
3332 return NULL((void*)0);
3333}
3334
3335/* Parse an element in the bracket expression. */
3336
3337static reg_errcode_t
3338parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3339 re_token_t *token, int token_len, re_dfa_t *dfa,
3340 reg_syntax_t syntax, int accept_hyphen)
3341{
3342#ifdef RE_ENABLE_I18N
3343 int cur_char_size;
3344 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)((regexp)->cur_idx));
3345 if (cur_char_size > 1)
3346 {
3347 elem->type = MB_CHAR;
3348 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)((regexp)->cur_idx));
3349 re_string_skip_bytes (regexp, cur_char_size)((regexp)->cur_idx += (cur_char_size));
3350 return REG_NOERROR;
3351 }
3352#endif /* RE_ENABLE_I18N */
3353 re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip a token. */
3354 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3355 || token->type == OP_OPEN_EQUIV_CLASS)
3356 return parse_bracket_symbol (elem, regexp, token);
3357 if (BE (token->type == OP_CHARSET_RANGE, 0)__builtin_expect (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3358 {
3359 /* A '-' must only appear as anything but a range indicator before
3360 the closing bracket. Everything else is an error. */
3361 re_token_t token2;
3362 (void) peek_token_bracket (&token2, regexp, syntax);
3363 if (token2.type != OP_CLOSE_BRACKET)
3364 /* The actual error value is not standardized since this whole
3365 case is undefined. But ERANGE makes good sense. */
3366 return REG_ERANGE;
3367 }
3368 elem->type = SB_CHAR;
3369 elem->opr.ch = token->opr.c;
3370 return REG_NOERROR;
3371}
3372
3373/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3374 such as [:<character_class>:], [.<collating_element>.], and
3375 [=<equivalent_class>=]. */
3376
3377static reg_errcode_t
3378parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3379 re_token_t *token)
3380{
3381 unsigned char ch, delim = token->opr.c;
3382 int i = 0;
3383 if (re_string_eoi(regexp)((regexp)->stop <= (regexp)->cur_idx))
3384 return REG_EBRACK;
3385 for (;; ++i)
3386 {
3387 if (i >= BRACKET_NAME_BUF_SIZE32)
3388 return REG_EBRACK;
3389 if (token->type == OP_OPEN_CHAR_CLASS)
3390 ch = re_string_fetch_byte_case (regexp);
3391 else
3392 ch = re_string_fetch_byte (regexp)((regexp)->mbs[(regexp)->cur_idx++]);
3393 if (re_string_eoi(regexp)((regexp)->stop <= (regexp)->cur_idx))
3394 return REG_EBRACK;
3395 if (ch == delim && re_string_peek_byte (regexp, 0)((regexp)->mbs[(regexp)->cur_idx + 0]) == ']')
3396 break;
3397 elem->opr.name[i] = ch;
3398 }
3399 re_string_skip_bytes (regexp, 1)((regexp)->cur_idx += (1));
3400 elem->opr.name[i] = '\0';
3401 switch (token->type)
3402 {
3403 case OP_OPEN_COLL_ELEM:
3404 elem->type = COLL_SYM;
3405 break;
3406 case OP_OPEN_EQUIV_CLASS:
3407 elem->type = EQUIV_CLASS;
3408 break;
3409 case OP_OPEN_CHAR_CLASS:
3410 elem->type = CHAR_CLASS;
3411 break;
3412 default:
3413 break;
3414 }
3415 return REG_NOERROR;
3416}
3417
3418 /* Helper function for parse_bracket_exp.
3419 Build the equivalence class which is represented by NAME.
3420 The result are written to MBCSET and SBCSET.
3421 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3422 is a pointer argument since we may update it. */
3423
3424static reg_errcode_t
3425#ifdef RE_ENABLE_I18N
3426build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3427 int *equiv_class_alloc, const unsigned char *name)
3428#else /* not RE_ENABLE_I18N */
3429build_equiv_class (bitset_t sbcset, const unsigned char *name)
3430#endif /* not RE_ENABLE_I18N */
3431{
3432#ifdef _LIBC
3433 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3434 if (nrules != 0)
3435 {
3436 const int32_t *table, *indirect;
3437 const unsigned char *weights, *extra, *cp;
3438 unsigned char char_buf[2];
3439 int32_t idx1, idx2;
3440 unsigned int ch;
3441 size_t len;
3442 /* This #include defines a local function! */
3443# include <locale/weight.h>
3444 /* Calculate the index for equivalence class. */
3445 cp = name;
3446 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3447 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3448 _NL_COLLATE_WEIGHTMB);
3449 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3450 _NL_COLLATE_EXTRAMB);
3451 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3452 _NL_COLLATE_INDIRECTMB);
3453 idx1 = findidx (&cp);
3454 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)__builtin_expect (idx1 == 0 || cp < name + strlen ((const char
*) name), 0)
)
3455 /* This isn't a valid character. */
3456 return REG_ECOLLATE;
3457
3458 /* Build single byte matcing table for this equivalence class. */
3459 char_buf[1] = (unsigned char) '\0';
3460 len = weights[idx1 & 0xffffff];
3461 for (ch = 0; ch < SBC_MAX256; ++ch)
3462 {
3463 char_buf[0] = ch;
3464 cp = char_buf;
3465 idx2 = findidx (&cp);
3466/*
3467 idx2 = table[ch];
3468*/
3469 if (idx2 == 0)
3470 /* This isn't a valid character. */
3471 continue;
3472 /* Compare only if the length matches and the collation rule
3473 index is the same. */
3474 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
3475 {
3476 int cnt = 0;
3477
3478 while (cnt <= len &&
3479 weights[(idx1 & 0xffffff) + 1 + cnt]
3480 == weights[(idx2 & 0xffffff) + 1 + cnt])
3481 ++cnt;
3482
3483 if (cnt > len)
3484 bitset_set (sbcset, ch)(sbcset[ch / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << ch % (sizeof (bitset_word_t) * 8))
;
3485 }
3486 }
3487 /* Check whether the array has enough space. */
3488 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)__builtin_expect (*equiv_class_alloc == mbcset->nequiv_classes
, 0)
)
3489 {
3490 /* Not enough, realloc it. */
3491 /* +1 in case of mbcset->nequiv_classes is 0. */
3492 int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3493 /* Use realloc since the array is NULL if *alloc == 0. */
3494 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,((mbcset->equiv_classes != ((void*)0)) ? (int32_t *) realloc
(mbcset->equiv_classes,(new_equiv_class_alloc)*sizeof(int32_t
)) : (int32_t *) calloc(new_equiv_class_alloc,sizeof(int32_t)
))
3495 int32_t,((mbcset->equiv_classes != ((void*)0)) ? (int32_t *) realloc
(mbcset->equiv_classes,(new_equiv_class_alloc)*sizeof(int32_t
)) : (int32_t *) calloc(new_equiv_class_alloc,sizeof(int32_t)
))
3496 new_equiv_class_alloc)((mbcset->equiv_classes != ((void*)0)) ? (int32_t *) realloc
(mbcset->equiv_classes,(new_equiv_class_alloc)*sizeof(int32_t
)) : (int32_t *) calloc(new_equiv_class_alloc,sizeof(int32_t)
))
;
3497 if (BE (new_equiv_classes == NULL, 0)__builtin_expect (new_equiv_classes == ((void*)0), 0))
3498 return REG_ESPACE;
3499 mbcset->equiv_classes = new_equiv_classes;
3500 *equiv_class_alloc = new_equiv_class_alloc;
3501 }
3502 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3503 }
3504 else
3505#endif /* _LIBC */
3506 {
3507 if (BE (strlen ((const char *) name) != 1, 0)__builtin_expect (strlen ((const char *) name) != 1, 0))
3508 return REG_ECOLLATE;
3509 bitset_set (sbcset, *name)(sbcset[*name / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << *name % (sizeof (bitset_word_t) * 8))
;
3510 }
3511 return REG_NOERROR;
3512}
3513
3514 /* Helper function for parse_bracket_exp.
3515 Build the character class which is represented by NAME.
3516 The result are written to MBCSET and SBCSET.
3517 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3518 is a pointer argument since we may update it. */
3519
3520static reg_errcode_t
3521#ifdef RE_ENABLE_I18N
3522build_charclass (RE_TRANSLATE_TYPEunsigned char * trans, bitset_t sbcset,
3523 re_charset_t *mbcset, int *char_class_alloc,
3524 const char *class_name, reg_syntax_t syntax)
3525#else /* not RE_ENABLE_I18N */
3526build_charclass (RE_TRANSLATE_TYPEunsigned char * trans, bitset_t sbcset,
3527 const char *class_name, reg_syntax_t syntax)
3528#endif /* not RE_ENABLE_I18N */
3529{
3530 int i;
3531
3532 /* In case of REG_ICASE "upper" and "lower" match the both of
3533 upper and lower cases. */
3534 if ((syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)
)
3535 && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0))
3536 class_name = "alpha";
3537
3538#ifdef RE_ENABLE_I18N
3539 /* Check the space of the arrays. */
3540 if (BE (*char_class_alloc == mbcset->nchar_classes, 0)__builtin_expect (*char_class_alloc == mbcset->nchar_classes
, 0)
)
3541 {
3542 /* Not enough, realloc it. */
3543 /* +1 in case of mbcset->nchar_classes is 0. */
3544 int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3545 /* Use realloc since array is NULL if *alloc == 0. */
3546 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,((mbcset->char_classes != ((void*)0)) ? (wctype_t *) realloc
(mbcset->char_classes,(new_char_class_alloc)*sizeof(wctype_t
)) : (wctype_t *) calloc(new_char_class_alloc,sizeof(wctype_t
)))
3547 new_char_class_alloc)((mbcset->char_classes != ((void*)0)) ? (wctype_t *) realloc
(mbcset->char_classes,(new_char_class_alloc)*sizeof(wctype_t
)) : (wctype_t *) calloc(new_char_class_alloc,sizeof(wctype_t
)))
;
3548 if (BE (new_char_classes == NULL, 0)__builtin_expect (new_char_classes == ((void*)0), 0))
3549 return REG_ESPACE;
3550 mbcset->char_classes = new_char_classes;
3551 *char_class_alloc = new_char_class_alloc;
3552 }
3553 mbcset->char_classes[mbcset->nchar_classes++] = __wctypewctype (class_name);
3554#endif /* RE_ENABLE_I18N */
3555
3556#define BUILD_CHARCLASS_LOOP(ctype_func)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (ctype_func (i)) (sbcset[trans[i] / (
sizeof (bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans
[i] % (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (ctype_func (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
\
3557 do { \
3558 if (BE (trans != NULL, 0)__builtin_expect (trans != ((void*)0), 0)) \
3559 { \
3560 for (i = 0; i < SBC_MAX256; ++i) \
3561 if (ctype_func (i)) \
3562 bitset_set (sbcset, trans[i])(sbcset[trans[i] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << trans[i] % (sizeof (bitset_word_t) * 8))
; \
3563 } \
3564 else \
3565 { \
3566 for (i = 0; i < SBC_MAX256; ++i) \
3567 if (ctype_func (i)) \
3568 bitset_set (sbcset, i)(sbcset[i / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t) 1
<< i % (sizeof (bitset_word_t) * 8))
; \
3569 } \
3570 } while (0)
3571
3572 if (strcmp (class_name, "alnum") == 0)
3573 BUILD_CHARCLASS_LOOP (isalnum)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isalnum (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isalnum (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3574 else if (strcmp (class_name, "cntrl") == 0)
3575 BUILD_CHARCLASS_LOOP (iscntrl)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (iscntrl (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (iscntrl (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3576 else if (strcmp (class_name, "lower") == 0)
3577 BUILD_CHARCLASS_LOOP (islower)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (islower (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (islower (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3578 else if (strcmp (class_name, "space") == 0)
3579 BUILD_CHARCLASS_LOOP (isspace)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isspace (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isspace (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3580 else if (strcmp (class_name, "alpha") == 0)
3581 BUILD_CHARCLASS_LOOP (isalpha)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isalpha (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isalpha (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3582 else if (strcmp (class_name, "digit") == 0)
3583 BUILD_CHARCLASS_LOOP (isdigit)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isdigit (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isdigit (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3584 else if (strcmp (class_name, "print") == 0)
3585 BUILD_CHARCLASS_LOOP (isprint)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isprint (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isprint (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3586 else if (strcmp (class_name, "upper") == 0)
3587 BUILD_CHARCLASS_LOOP (isupper)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isupper (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isupper (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3588 else if (strcmp (class_name, "blank") == 0)
3589#ifndef GAWK1
3590 BUILD_CHARCLASS_LOOP (isblank)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isblank (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isblank (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3591#else
3592 /* see comments above */
3593 BUILD_CHARCLASS_LOOP (is_blank)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (is_blank (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (is_blank (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3594#endif
3595 else if (strcmp (class_name, "graph") == 0)
3596 BUILD_CHARCLASS_LOOP (isgraph)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isgraph (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isgraph (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3597 else if (strcmp (class_name, "punct") == 0)
3598 BUILD_CHARCLASS_LOOP (ispunct)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (ispunct (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (ispunct (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3599 else if (strcmp (class_name, "xdigit") == 0)
3600 BUILD_CHARCLASS_LOOP (isxdigit)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isxdigit (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isxdigit (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0)
;
3601 else
3602 return REG_ECTYPE;
3603
3604 return REG_NOERROR;
3605}
3606
3607static bin_tree_t *
3608build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPEunsigned char * trans,
3609 const char *class_name,
3610 const char *extra, int non_match,
3611 reg_errcode_t *err)
3612{
3613 re_bitset_ptr_t sbcset;
3614#ifdef RE_ENABLE_I18N
3615 re_charset_t *mbcset;
3616 int alloc = 0;
3617#endif /* not RE_ENABLE_I18N */
3618 reg_errcode_t ret;
3619 re_token_t br_token;
3620 bin_tree_t *tree;
3621
3622 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3623#ifdef RE_ENABLE_I18N
3624 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3625#endif /* RE_ENABLE_I18N */
3626
3627#ifdef RE_ENABLE_I18N
3628 if (BE (sbcset == NULL || mbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0) || mbcset == ((void*)0
), 0)
)
3629#else /* not RE_ENABLE_I18N */
3630 if (BE (sbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0), 0))
3631#endif /* not RE_ENABLE_I18N */
3632 {
3633 *err = REG_ESPACE;
3634 return NULL((void*)0);
3635 }
3636
3637 if (non_match)
3638 {
3639#ifdef RE_ENABLE_I18N
3640 mbcset->non_match = 1;
3641#endif /* not RE_ENABLE_I18N */
3642 }
3643
3644 /* We don't care the syntax in this case. */
3645 ret = build_charclass (trans, sbcset,
3646#ifdef RE_ENABLE_I18N
3647 mbcset, &alloc,
3648#endif /* RE_ENABLE_I18N */
3649 class_name, 0);
3650
3651 if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))
3652 {
3653 re_free (sbcset)free (sbcset);
3654#ifdef RE_ENABLE_I18N
3655 free_charset (mbcset);
3656#endif /* RE_ENABLE_I18N */
3657 *err = ret;
3658 return NULL((void*)0);
3659 }
3660 /* \w match '_' also. */
3661 for (; *extra; extra++)
3662 bitset_set (sbcset, *extra)(sbcset[*extra / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << *extra % (sizeof (bitset_word_t) * 8))
;
3663
3664 /* If it is non-matching list. */
3665 if (non_match)
3666 bitset_not (sbcset);
3667
3668#ifdef RE_ENABLE_I18N
3669 /* Ensure only single byte characters are set. */
3670 if (dfa->mb_cur_max > 1)
3671 bitset_mask (sbcset, dfa->sb_char);
3672#endif
3673
3674 /* Build a tree for simple bracket. */
3675 br_token.type = SIMPLE_BRACKET;
3676 br_token.opr.sbcset = sbcset;
3677 tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);
3678 if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))
3679 goto build_word_op_espace;
3680
3681#ifdef RE_ENABLE_I18N
3682 if (dfa->mb_cur_max > 1)
3683 {
3684 bin_tree_t *mbc_tree;
3685 /* Build a tree for complex bracket. */
3686 br_token.type = COMPLEX_BRACKET;
3687 br_token.opr.mbcset = mbcset;
3688 dfa->has_mb_node = 1;
3689 mbc_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);
3690 if (BE (mbc_tree == NULL, 0)__builtin_expect (mbc_tree == ((void*)0), 0))
3691 goto build_word_op_espace;
3692 /* Then join them by ALT node. */
3693 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3694 if (BE (mbc_tree != NULL, 1)__builtin_expect (mbc_tree != ((void*)0), 1))
3695 return tree;
3696 }
3697 else
3698 {
3699 free_charset (mbcset);
3700 return tree;
3701 }
3702#else /* not RE_ENABLE_I18N */
3703 return tree;
3704#endif /* not RE_ENABLE_I18N */
3705
3706 build_word_op_espace:
3707 re_free (sbcset)free (sbcset);
3708#ifdef RE_ENABLE_I18N
3709 free_charset (mbcset);
3710#endif /* RE_ENABLE_I18N */
3711 *err = REG_ESPACE;
3712 return NULL((void*)0);
3713}
3714
3715/* This is intended for the expressions like "a{1,3}".
3716 Fetch a number from `input', and return the number.
3717 Return -1, if the number field is empty like "{,1}".
3718 Return -2, if an error has occurred. */
3719
3720static int
3721fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3722{
3723 int num = -1;
3724 unsigned char c;
3725 while (1)
3726 {
3727 fetch_token (token, input, syntax);
3728 c = token->opr.c;
3729 if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))
3730 return -2;
3731 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3732 break;
3733 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3734 ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3735 num = (num > RE_DUP_MAX(0x7fff)) ? -2 : num;
3736 }
3737 return num;
3738}
3739
3740#ifdef RE_ENABLE_I18N
3741static void
3742free_charset (re_charset_t *cset)
3743{
3744 re_free (cset->mbchars)free (cset->mbchars);
3745# ifdef _LIBC
3746 re_free (cset->coll_syms)free (cset->coll_syms);
3747 re_free (cset->equiv_classes)free (cset->equiv_classes);
3748 re_free (cset->range_starts)free (cset->range_starts);
3749 re_free (cset->range_ends)free (cset->range_ends);
3750# endif
3751 re_free (cset->char_classes)free (cset->char_classes);
3752 re_free (cset)free (cset);
3753}
3754#endif /* RE_ENABLE_I18N */
3755
3756/* Functions for binary tree operation. */
3757
3758/* Create a tree node. */
3759
3760static bin_tree_t *
3761create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3762 re_token_type_t type)
3763{
3764 re_token_t t;
3765 t.type = type;
3766 return create_token_tree (dfa, left, right, &t);
14
Calling 'create_token_tree'
19
Returning from 'create_token_tree'
3767}
3768
3769static bin_tree_t *
3770create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3771 const re_token_t *token)
3772{
3773 bin_tree_t *tree;
3774 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)__builtin_expect (dfa->str_tree_storage_idx == ((1024 - sizeof
(void *)) / sizeof (bin_tree_t)), 0)
)
15
Taking false branch
3775 {
3776 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1)((bin_tree_storage_t *) malloc ((1) * sizeof (bin_tree_storage_t
)))
;
3777
3778 if (storage == NULL((void*)0))
3779 return NULL((void*)0);
3780 storage->next = dfa->str_tree_storage;
3781 dfa->str_tree_storage = storage;
3782 dfa->str_tree_storage_idx = 0;
3783 }
3784 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3785
3786 tree->parent = NULL((void*)0);
3787 tree->left = left;
3788 tree->right = right;
3789 tree->token = *token;
3790 tree->token.duplicated = 0;
3791 tree->token.opt_subexp = 0;
3792 tree->first = NULL((void*)0);
16
Null pointer value stored to field 'first'
3793 tree->next = NULL((void*)0);
3794 tree->node_idx = -1;
3795
3796 if (left != NULL((void*)0))
17
Taking false branch
3797 left->parent = tree;
3798 if (right != NULL((void*)0))
18
Taking false branch
3799 right->parent = tree;
3800 return tree;
3801}
3802
3803/* Mark the tree SRC as an optional subexpression.
3804 To be called from preorder or postorder. */
3805
3806static reg_errcode_t
3807mark_opt_subexp (void *extra, bin_tree_t *node)
3808{
3809 int idx = (int) (intptr_t) extra;
3810 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3811 node->token.opt_subexp = 1;
3812
3813 return REG_NOERROR;
3814}
3815
3816/* Free the allocated memory inside NODE. */
3817
3818static void
3819free_token (re_token_t *node)
3820{
3821#ifdef RE_ENABLE_I18N
3822 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3823 free_charset (node->opr.mbcset);
3824 else
3825#endif /* RE_ENABLE_I18N */
3826 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3827 re_free (node->opr.sbcset)free (node->opr.sbcset);
3828}
3829
3830/* Worker function for tree walking. Free the allocated memory inside NODE
3831 and its children. */
3832
3833static reg_errcode_t
3834free_tree (void *extra, bin_tree_t *node)
3835{
3836 free_token (&node->token);
3837 return REG_NOERROR;
3838}
3839
3840
3841/* Duplicate the node SRC, and return new node. This is a preorder
3842 visit similar to the one implemented by the generic visitor, but
3843 we need more infrastructure to maintain two parallel trees --- so,
3844 it's easier to duplicate. */
3845
3846static bin_tree_t *
3847duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3848{
3849 const bin_tree_t *node;
3850 bin_tree_t *dup_root;
3851 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
3852
3853 for (node = root; ; )
3854 {
3855 /* Create a new tree and link it back to the current parent. */
3856 *p_new = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &node->token);
3857 if (*p_new == NULL((void*)0))
3858 return NULL((void*)0);
3859 (*p_new)->parent = dup_node;
3860 (*p_new)->token.duplicated = 1;
3861 dup_node = *p_new;
3862
3863 /* Go to the left node, or up and to the right. */
3864 if (node->left)
3865 {
3866 node = node->left;
3867 p_new = &dup_node->left;
3868 }
3869 else
3870 {
3871 const bin_tree_t *prev = NULL((void*)0);
3872 while (node->right == prev || node->right == NULL((void*)0))
3873 {
3874 prev = node;
3875 node = node->parent;
3876 dup_node = dup_node->parent;
3877 if (!node)
3878 return dup_root;
3879 }
3880 node = node->right;
3881 p_new = &dup_node->right;
3882 }
3883 }
3884}