compat/regex/regcomp.c

Bug Summary

File:	compat/regex/regcomp.c
Location:	line 1003, column 11
Description:	Access to field 'node_idx' results in a dereference of a null pointer (loaded from field 'first')

Annotated Source Code

/* Extended regular expression matching and search library.

This file is part of the GNU C Library.

Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.

The GNU C Library is free software; you can redistribute it and/or

modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either

version 2.1 of the License, or (at your option) any later version.

The GNU C Library is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public

License along with the GNU C Library; if not, write to the Free

Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

02110-1301 USA. */

static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,

size_t length, reg_syntax_t syntax);

static void re_compile_fastmap_iter (regex_t *bufp,

const re_dfastate_t *init_state,

char *fastmap);

static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);

#ifdef RE_ENABLE_I18N

static void free_charset (re_charset_t *cset);

#endif /* RE_ENABLE_I18N */

static void free_workarea_compile (regex_t *preg);

static reg_errcode_t create_initial_state (re_dfa_t *dfa);

#ifdef RE_ENABLE_I18N

static void optimize_utf8 (re_dfa_t *dfa);

#endif

static reg_errcode_t analyze (regex_t *preg);

static reg_errcode_t preorder (bin_tree_t *root,

reg_errcode_t (fn (void *, bin_tree_t *)),

void *extra);

static reg_errcode_t postorder (bin_tree_t *root,

reg_errcode_t (fn (void *, bin_tree_t *)),

void *extra);

static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);

static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);

static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,

bin_tree_t *node);

static reg_errcode_t calc_first (void *extra, bin_tree_t *node);

static reg_errcode_t calc_next (void *extra, bin_tree_t *node);

static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);

static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);

static int search_duplicated_node (const re_dfa_t *dfa, int org_node,

unsigned int constraint);

static reg_errcode_t calc_eclosure (re_dfa_t *dfa);

static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,

int node, int root);

static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);

static int fetch_number (re_string_t *input, re_token_t *token,

reg_syntax_t syntax);

static int peek_token (re_token_t *token, re_string_t *input,

reg_syntax_t syntax) internal_function;

static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,

reg_syntax_t syntax, reg_errcode_t *err);

static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,

re_token_t *token, reg_syntax_t syntax,

int nest, reg_errcode_t *err);

static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,

re_token_t *token, reg_syntax_t syntax,

int nest, reg_errcode_t *err);

static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,

re_token_t *token, reg_syntax_t syntax,

int nest, reg_errcode_t *err);

static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,

re_token_t *token, reg_syntax_t syntax,

int nest, reg_errcode_t *err);

static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,

re_dfa_t *dfa, re_token_t *token,

reg_syntax_t syntax, reg_errcode_t *err);

static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,

re_token_t *token, reg_syntax_t syntax,

reg_errcode_t *err);

static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,

re_string_t *regexp,

re_token_t *token, int token_len,

re_dfa_t *dfa,

reg_syntax_t syntax,

int accept_hyphen);

static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,

re_string_t *regexp,

re_token_t *token);

#ifdef RE_ENABLE_I18N

static reg_errcode_t build_equiv_class (bitset_t sbcset,

re_charset_t *mbcset,

int *equiv_class_alloc,

const unsigned char *name);

static reg_errcode_t build_charclass (RE_TRANSLATE_TYPEunsigned char * trans,

bitset_t sbcset,

re_charset_t *mbcset,

int *char_class_alloc,

const char *class_name,

reg_syntax_t syntax);

100

#else /* not RE_ENABLE_I18N */

101

static reg_errcode_t build_equiv_class (bitset_t sbcset,

102

const unsigned char *name);

103

static reg_errcode_t build_charclass (RE_TRANSLATE_TYPEunsigned char * trans,

104

bitset_t sbcset,

105

const char *class_name,

106

reg_syntax_t syntax);

107

#endif /* not RE_ENABLE_I18N */

108

static bin_tree_t *build_charclass_op (re_dfa_t *dfa,

109

RE_TRANSLATE_TYPEunsigned char * trans,

110

const char *class_name,

111

const char *extra,

112

int non_match, reg_errcode_t *err);

113

static bin_tree_t *create_tree (re_dfa_t *dfa,

114

bin_tree_t *left, bin_tree_t *right,

115

re_token_type_t type);

116

static bin_tree_t *create_token_tree (re_dfa_t *dfa,

117

bin_tree_t *left, bin_tree_t *right,

118

const re_token_t *token);

119

static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);

120

static void free_token (re_token_t *node);

121

static reg_errcode_t free_tree (void *extra, bin_tree_t *node);

122

static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);

123

124

/* This table gives an error message for each of the error codes listed

125

in regex.h. Obviously the order here has to be same as there.

126

POSIX doesn't require that we do anything for REG_NOERROR,

127

but why not be nice? */

128

129

const char __re_error_msgid[] attribute_hidden =

130

{

131

#define REG_NOERROR_IDX0 0

132

gettext_noop ("Success")"Success" /* REG_NOERROR */

133

"\0"

134

#define REG_NOMATCH_IDX(0 + sizeof "Success") (REG_NOERROR_IDX0 + sizeof "Success")

135

gettext_noop ("No match")"No match" /* REG_NOMATCH */

136

"\0"

137

#define REG_BADPAT_IDX((0 + sizeof "Success") + sizeof "No match") (REG_NOMATCH_IDX(0 + sizeof "Success") + sizeof "No match")

138

gettext_noop ("Invalid regular expression")"Invalid regular expression" /* REG_BADPAT */

139

"\0"

140

#define REG_ECOLLATE_IDX(((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) (REG_BADPAT_IDX((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression")

141

gettext_noop ("Invalid collation character")"Invalid collation character" /* REG_ECOLLATE */

142

"\0"

143

#define REG_ECTYPE_IDX((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") (REG_ECOLLATE_IDX(((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character")

144

gettext_noop ("Invalid character class name")"Invalid character class name" /* REG_ECTYPE */

145

"\0"

146

#define REG_EESCAPE_IDX(((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) (REG_ECTYPE_IDX((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name")

147

gettext_noop ("Trailing backslash")"Trailing backslash" /* REG_EESCAPE */

148

"\0"

149

#define REG_ESUBREG_IDX((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") (REG_EESCAPE_IDX(((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash")

150

gettext_noop ("Invalid back reference")"Invalid back reference" /* REG_ESUBREG */

151

"\0"

152

#define REG_EBRACK_IDX(((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) (REG_ESUBREG_IDX((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference")

153

gettext_noop ("Unmatched [ or [^")"Unmatched [ or [^" /* REG_EBRACK */

154

"\0"

155

#define REG_EPAREN_IDX((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") (REG_EBRACK_IDX(((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^")

156

gettext_noop ("Unmatched ( or \\(")"Unmatched ( or \\(" /* REG_EPAREN */

157

"\0"

158

#define REG_EBRACE_IDX(((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("
) (REG_EPAREN_IDX((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(")

159

gettext_noop ("Unmatched \\{")"Unmatched \\{" /* REG_EBRACE */

160

"\0"

161

#define REG_BADBR_IDX((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") (REG_EBRACE_IDX(((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("
) + sizeof "Unmatched \\{")

162

gettext_noop ("Invalid content of \\{\\}")"Invalid content of \\{\\}" /* REG_BADBR */

163

"\0"

164

#define REG_ERANGE_IDX(((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") (REG_BADBR_IDX((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}")

165

gettext_noop ("Invalid range end")"Invalid range end" /* REG_ERANGE */

166

"\0"

167

#define REG_ESPACE_IDX((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") (REG_ERANGE_IDX(((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end")

168

gettext_noop ("Memory exhausted")"Memory exhausted" /* REG_ESPACE */

169

"\0"

170

#define REG_BADRPT_IDX(((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") (REG_ESPACE_IDX((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted")

171

gettext_noop ("Invalid preceding regular expression")"Invalid preceding regular expression" /* REG_BADRPT */

172

"\0"

173

#define REG_EEND_IDX((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) (REG_BADRPT_IDX(((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression")

174

gettext_noop ("Premature end of regular expression")"Premature end of regular expression" /* REG_EEND */

175

"\0"

176

#define REG_ESIZE_IDX(((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression") (REG_EEND_IDX((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression")

177

gettext_noop ("Regular expression too big")"Regular expression too big" /* REG_ESIZE */

178

"\0"

179

#define REG_ERPAREN_IDX((((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression") + sizeof "Regular expression too big"
) (REG_ESIZE_IDX(((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression") + sizeof "Regular expression too big")

180

gettext_noop ("Unmatched ) or \\)")"Unmatched ) or \\)" /* REG_ERPAREN */

181

};

182

183

const size_t __re_error_msgid_idx[] attribute_hidden =

184

{

185

REG_NOERROR_IDX0,

186

REG_NOMATCH_IDX(0 + sizeof "Success"),

187

REG_BADPAT_IDX((0 + sizeof "Success") + sizeof "No match"),

188

REG_ECOLLATE_IDX(((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
),

189

REG_ECTYPE_IDX((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character"),

190

REG_EESCAPE_IDX(((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
),

191

REG_ESUBREG_IDX((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash"),

192

REG_EBRACK_IDX(((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
),

193

REG_EPAREN_IDX((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^"),

194

REG_EBRACE_IDX(((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"
) + sizeof "Invalid collation character") + sizeof "Invalid character class name"
) + sizeof "Trailing backslash") + sizeof "Invalid back reference"
) + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("
),

195

REG_BADBR_IDX((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{"),

196

REG_ERANGE_IDX(((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}"),

197

REG_ESPACE_IDX((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end"),

198

REG_BADRPT_IDX(((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted"),

199

REG_EEND_IDX((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
),

200

REG_ESIZE_IDX(((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression"),

201

REG_ERPAREN_IDX((((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof
"Invalid regular expression") + sizeof "Invalid collation character"
) + sizeof "Invalid character class name") + sizeof "Trailing backslash"
) + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"
) + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof
"Invalid content of \\{\\}") + sizeof "Invalid range end") +
sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"
) + sizeof "Premature end of regular expression") + sizeof "Regular expression too big"
)

202

};

203

204

/* Entry points for GNU code. */

205

206

207

#ifdef ZOS_USS

208

209

/* For ZOS USS we must define btowc */

210

211

wchar_t

212

btowc (int c)

213

{

214

wchar_t wtmp[2];

215

char tmp[2];

216

217

tmp[0] = c;

218

tmp[1] = 0;

219

220

mbtowc (wtmp, tmp, 1);

221

return wtmp[0];

222

}

223

#endif

224

225

/* re_compile_pattern is the GNU regular expression compiler: it

226

compiles PATTERN (of length LENGTH) and puts the result in BUFP.

227

Returns 0 if the pattern was valid, otherwise an error string.

228

229

Assumes the `allocated' (and perhaps `buffer') and `translate' fields

230

are set in BUFP on entry. */

231

232

const char *

233

re_compile_pattern (const char *pattern,

234

size_t length,

235

struct re_pattern_buffer *bufp)

236

{

237

reg_errcode_t ret;

238

239

/* And GNU code determines whether or not to get register information

240

by passing null for the REGS argument to re_match, etc., not by

241

setting no_sub, unless RE_NO_SUB is set. */

242

bufp->no_sub = !!(re_syntax_options & RE_NO_SUB((((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1));

243

244

/* Match anchors at newline. */

245

bufp->newline_anchor = 1;

246

247

ret = re_compile_internal (bufp, pattern, length, re_syntax_options);

248

249

if (!ret)

250

return NULL((void*)0);

251

return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret])(__re_error_msgid + __re_error_msgid_idx[(int) ret]);

252

}

253

#ifdef _LIBC

254

weak_alias (__re_compile_pattern, re_compile_pattern)

255

#endif

256

257

/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can

258

also be assigned to arbitrarily: each pattern buffer stores its own

259

syntax, so it can be changed between regex compilations. */

260

/* This has no initializer because initialized variables in Emacs

261

become read-only after dumping. */

262

reg_syntax_t re_syntax_options;

263

264

265

/* Specify the precise syntax of regexps for compilation. This provides

266

for compatibility for various utilities which historically have

267

different, incompatible syntaxes.

268

269

The argument SYNTAX is a bit mask comprised of the various bits

270

defined in regex.h. We return the old syntax. */

271

272

reg_syntax_t

273

re_set_syntax (reg_syntax_t syntax)

274

{

275

reg_syntax_t ret = re_syntax_options;

276

277

re_syntax_options = syntax;

278

return ret;

279

}

280

#ifdef _LIBC

281

weak_alias (__re_set_syntax, re_set_syntax)

282

#endif

283

284

int

285

re_compile_fastmap (struct re_pattern_buffer *bufp)

286

{

287

re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;

288

char *fastmap = bufp->fastmap;

289

290

memset (fastmap, '\0', sizeof (char) * SBC_MAX)__builtin___memset_chk (fastmap, '\0', sizeof (char) * 256, __builtin_object_size
(fastmap, 0));

291

re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);

292

if (dfa->init_state != dfa->init_state_word)

293

re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);

294

if (dfa->init_state != dfa->init_state_nl)

295

re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);

296

if (dfa->init_state != dfa->init_state_begbuf)

297

re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);

298

bufp->fastmap_accurate = 1;

299

return 0;

300

}

301

#ifdef _LIBC

302

weak_alias (__re_compile_fastmap, re_compile_fastmap)

303

#endif

304

305

static inline void

306

__attribute ((always_inline))__attribute__ ((always_inline))

307

re_set_fastmap (char *fastmap, int icase, int ch)

308

{

309

fastmap[ch] = 1;

310

if (icase)

311

fastmap[tolower (ch)] = 1;

312

}

313

314

/* Helper function for re_compile_fastmap.

315

Compile fastmap for the initial_state INIT_STATE. */

316

317

static void

318

re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,

319

char *fastmap)

320

{

321

volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;

322

int node_cnt;

323

int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)));

324

for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)

325

{

326

int node = init_state->nodes.elems[node_cnt];

327

re_token_type_t type = dfa->nodes[node].type;

328

329

if (type == CHARACTER)

330

{

331

re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);

332

#ifdef RE_ENABLE_I18N

333

if ((bufp->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)) && dfa->mb_cur_max > 1)

334

{

335

unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max)((unsigned char *) malloc ((dfa->mb_cur_max) * sizeof (unsigned
char))), *p;

336

wchar_t wc;

337

mbstate_t state;

338

339

p = buf;

340

*p++ = dfa->nodes[node].opr.c;

341

while (++node < dfa->nodes_len

342

&& dfa->nodes[node].type == CHARACTER

343

&& dfa->nodes[node].mb_partial)

344

*p++ = dfa->nodes[node].opr.c;

345

memset (&state, '\0', sizeof (state))__builtin___memset_chk (&state, '\0', sizeof (state), __builtin_object_size
(&state, 0));

346

if (__mbrtowcmbrtowc (&wc, (const char *) buf, p - buf,

347

&state) == p - buf

348

&& (__wcrtombwcrtomb ((char *) buf, towlower (wc), &state)

349

!= (size_t) -1))

350

re_set_fastmap (fastmap, 0, buf[0]);

351

re_free (buf)free (buf);

352

}

353

#endif

354

}

355

else if (type == SIMPLE_BRACKET)

356

{

357

int i, ch;

358

for (i = 0, ch = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)

359

{

360

int j;

361

bitset_word_t w = dfa->nodes[node].opr.sbcset[i];

362

for (j = 0; j < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); ++j, ++ch)

363

if (w & ((bitset_word_t) 1 << j))

364

re_set_fastmap (fastmap, icase, ch);

365

}

366

}

367

#ifdef RE_ENABLE_I18N

368

else if (type == COMPLEX_BRACKET)

369

{

370

re_charset_t *cset = dfa->nodes[node].opr.mbcset;

371

int i;

372

373

# ifdef _LIBC

374

/* See if we have to try all bytes which start multiple collation

375

elements.

376

e.g. In da_DK, we want to catch 'a' since "aa" is a valid

377

collation element, and don't catch 'b' since 'b' is

378

the only collation element which starts from 'b' (and

379

it is caught by SIMPLE_BRACKET). */

380

if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0

381

&& (cset->ncoll_syms || cset->nranges))

382

{

383

const int32_t *table = (const int32_t *)

384

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);

385

for (i = 0; i < SBC_MAX256; ++i)

386

if (table[i] < 0)

387

re_set_fastmap (fastmap, icase, i);

388

}

389

# endif /* _LIBC */

390

391

/* See if we have to start the match at all multibyte characters,

392

i.e. where we would not find an invalid sequence. This only

393

applies to multibyte character sets; for single byte character

394

sets, the SIMPLE_BRACKET again suffices. */

395

if (dfa->mb_cur_max > 1

396

&& (cset->nchar_classes || cset->non_match || cset->nranges

397

# ifdef _LIBC

398

|| cset->nequiv_classes

399

# endif /* _LIBC */

400

))

401

{

402

unsigned char c = 0;

403

404

{

405

mbstate_t mbs;

406

memset (&mbs, 0, sizeof (mbs))__builtin___memset_chk (&mbs, 0, sizeof (mbs), __builtin_object_size
(&mbs, 0));

407

if (__mbrtowcmbrtowc (NULL((void*)0), (char *) &c, 1, &mbs) == (size_t) -2)

408

re_set_fastmap (fastmap, false(0), (int) c);

409

}

410

while (++c != 0);

411

}

412

413

else

414

{

415

/* ... Else catch all bytes which can start the mbchars. */

416

for (i = 0; i < cset->nmbchars; ++i)

417

{

418

char buf[256];

419

mbstate_t state;

420

memset (&state, '\0', sizeof (state))__builtin___memset_chk (&state, '\0', sizeof (state), __builtin_object_size
(&state, 0));

421

if (__wcrtombwcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)

422

re_set_fastmap (fastmap, icase, *(unsigned char *) buf);

423

424

{

425

if (__wcrtombwcrtomb (buf, towlower (cset->mbchars[i]), &state)

426

!= (size_t) -1)

427

re_set_fastmap (fastmap, false(0), *(unsigned char *) buf);

428

}

429

}

430

}

431

}

432

#endif /* RE_ENABLE_I18N */

433

else if (type == OP_PERIOD

434

#ifdef RE_ENABLE_I18N

435

|| type == OP_UTF8_PERIOD

436

#endif /* RE_ENABLE_I18N */

437

|| type == END_OF_RE)

438

{

439

memset (fastmap, '\1', sizeof (char) * SBC_MAX)__builtin___memset_chk (fastmap, '\1', sizeof (char) * 256, __builtin_object_size
(fastmap, 0));

440

if (type == END_OF_RE)

441

bufp->can_be_null = 1;

442

return;

443

}

444

}

445

}

446

447

/* Entry point for POSIX code. */

448

/* regcomp takes a regular expression as a string and compiles it.

449

450

PREG is a regex_t *. We do not expect any fields to be initialized,

451

since POSIX says we shouldn't. Thus, we set

452

453

`buffer' to the compiled pattern;

454

`used' to the length of the compiled pattern;

455

`syntax' to RE_SYNTAX_POSIX_EXTENDED if the

456

REG_EXTENDED bit in CFLAGS is set; otherwise, to

457

RE_SYNTAX_POSIX_BASIC;

458

`newline_anchor' to REG_NEWLINE being set in CFLAGS;

459

`fastmap' to an allocated space for the fastmap;

460

`fastmap_accurate' to zero;

461

`re_nsub' to the number of subexpressions in PATTERN.

462

463

PATTERN is the address of the pattern string.

464

465

CFLAGS is a series of bits which affect compilation.

466

467

If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we

468

use POSIX basic syntax.

469

470

If REG_NEWLINE is set, then . and [^...] don't match newline.

471

Also, regexec will try a match beginning after every newline.

472

473

If REG_ICASE is set, then we considers upper- and lowercase

474

versions of letters to be equivalent when matching.

475

476

If REG_NOSUB is set, then when PREG is passed to regexec, that

477

routine will report only success or failure, and nothing about the

478

registers.

479

480

It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for

481

the return codes and their meanings.) */

482

483

int

484

regcomp (regex_t *__restrictrestrict preg,

485

const char *__restrictrestrict pattern,

486

int cflags)

487

{

488

reg_errcode_t ret;

489

reg_syntax_t syntax = ((cflags & REG_EXTENDED1) ? RE_SYNTAX_POSIX_EXTENDED((((((unsigned long int) 1) << 1) << 1) | (((((((
(unsigned long int) 1) << 1) << 1) << 1) <<
1) << 1) << 1) | (((((((((unsigned long int) 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) | (((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) | ((((((((((((((((((unsigned long int
) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1)) | (((((unsigned long int) 1) << 1) <<
1) << 1) | ((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) | ((((((((((((((unsigned long int
) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) | (((((((((((((((unsigned long int)
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) | (((((((((((((((((unsigned
long int) 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) | (((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) | (((((((((((((((((((unsigned long
int) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1))

'?' condition is false

→

490

: RE_SYNTAX_POSIX_BASIC((((((unsigned long int) 1) << 1) << 1) | (((((((
(unsigned long int) 1) << 1) << 1) << 1) <<
1) << 1) << 1) | (((((((((unsigned long int) 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) | (((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) | ((((((((((((((((((unsigned long int
) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1)) | (((unsigned long int) 1) << 1) | ((((((
(((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
)));

491

492

preg->buffer = NULL((void*)0);

493

preg->allocated = 0;

494

preg->used = 0;

495

496

/* Try to allocate space for the fastmap. */

497

preg->fastmap = re_malloc (char, SBC_MAX)((char *) malloc ((256) * sizeof (char)));

498

if (BE (preg->fastmap == NULL, 0)__builtin_expect (preg->fastmap == ((void*)0), 0))

←

Taking false branch

→

499

return REG_ESPACE;

500

501

syntax |= (cflags & REG_ICASE(1 << 1)) ? RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) : 0;

←

'?' condition is false

→

502

503

/* If REG_NEWLINE is set, newlines are treated differently. */

504

if (cflags & REG_NEWLINE((1 << 1) << 1))

←

Taking false branch

→

505

{ /* REG_NEWLINE implies neither . nor [^...] match newline. */

506

syntax &= ~RE_DOT_NEWLINE((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1);

507

syntax |= RE_HAT_LISTS_NOT_NEWLINE((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
);

508

/* It also changes the matching behavior. */

509

preg->newline_anchor = 1;

510

}

511

else

512

preg->newline_anchor = 0;

513

preg->no_sub = !!(cflags & REG_NOSUB(((1 << 1) << 1) << 1));

514

preg->translate = NULL((void*)0);

515

516

ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);

←

Calling 're_compile_internal'

→

517

518

/* POSIX doesn't distinguish between an unmatched open-group and an

519

unmatched close-group: both are REG_EPAREN. */

520

if (ret == REG_ERPAREN)

521

ret = REG_EPAREN;

522

523

/* We have already checked preg->fastmap != NULL. */

524

if (BE (ret == REG_NOERROR, 1)__builtin_expect (ret == REG_NOERROR, 1))

525

/* Compute the fastmap now, since regexec cannot modify the pattern

526

buffer. This function never fails in this implementation. */

527

(void) re_compile_fastmap (preg);

528

else

529

{

530

/* Some error occurred while compiling the expression. */

531

re_free (preg->fastmap)free (preg->fastmap);

532

preg->fastmap = NULL((void*)0);

533

}

534

535

return (int) ret;

536

}

537

#ifdef _LIBC

538

weak_alias (__regcomp, regcomp)

539

#endif

540

541

/* Returns a message corresponding to an error code, ERRCODE, returned

542

from either regcomp or regexec. We don't use PREG here. */

543

544

size_t

545

regerror(int errcode, const regex_t *__restrictrestrict preg,

546

char *__restrictrestrict errbuf, size_t errbuf_size)

547

{

548

const char *msg;

549

size_t msg_size;

550

551

if (BE (errcode < 0__builtin_expect (errcode < 0 || errcode >= (int) (sizeof
(__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0
)

552

|| errcode >= (int) (sizeof (__re_error_msgid_idx)__builtin_expect (errcode < 0 || errcode >= (int) (sizeof
(__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0
)

553

/ sizeof (__re_error_msgid_idx[0])), 0)__builtin_expect (errcode < 0 || errcode >= (int) (sizeof
(__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0
))

554

/* Only error codes returned by the rest of the code should be passed

555

to this routine. If we are given anything else, or if other regex

556

code generates an invalid error code, then the program has a bug.

557

Dump core so we can fix it. */

558

abort ();

559

560

msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode])(__re_error_msgid + __re_error_msgid_idx[errcode]);

561

562

msg_size = strlen (msg) + 1; /* Includes the null. */

563

564

if (BE (errbuf_size != 0, 1)__builtin_expect (errbuf_size != 0, 1))

565

{

566

if (BE (msg_size > errbuf_size, 0)__builtin_expect (msg_size > errbuf_size, 0))

567

{

568

memcpy (errbuf, msg, errbuf_size - 1)__builtin___memcpy_chk (errbuf, msg, errbuf_size - 1, __builtin_object_size
(errbuf, 0));

569

errbuf[errbuf_size - 1] = 0;

570

}

571

else

572

memcpy (errbuf, msg, msg_size)__builtin___memcpy_chk (errbuf, msg, msg_size, __builtin_object_size
(errbuf, 0));

573

}

574

575

return msg_size;

576

}

577

#ifdef _LIBC

578

weak_alias (__regerror, regerror)

579

#endif

580

581

582

#ifdef RE_ENABLE_I18N

583

/* This static array is used for the map to single-byte characters when

584

UTF-8 is used. Otherwise we would allocate memory just to initialize

585

it the same all the time. UTF-8 is the preferred encoding so this is

586

a worthwhile optimization. */

587

#if __GNUC__4 >= 3

588

static const bitset_t utf8_sb_map = {

589

/* Set the first 128 bits. */

590

[0 ... 0x80 / BITSET_WORD_BITS(sizeof (bitset_word_t) * 8) - 1] = BITSET_WORD_MAX(9223372036854775807L *2UL+1UL)

591

};

592

#else /* ! (__GNUC__ >= 3) */

593

static bitset_t utf8_sb_map;

594

#endif /* __GNUC__ >= 3 */

595

#endif /* RE_ENABLE_I18N */

596

597

598

static void

599

free_dfa_content (re_dfa_t *dfa)

600

{

601

int i, j;

602

603

if (dfa->nodes)

604

for (i = 0; i < dfa->nodes_len; ++i)

605

free_token (dfa->nodes + i);

606

re_free (dfa->nexts)free (dfa->nexts);

607

for (i = 0; i < dfa->nodes_len; ++i)

608

{

609

if (dfa->eclosures != NULL((void*)0))

610

re_node_set_free (dfa->eclosures + i)free ((dfa->eclosures + i)->elems);

611

if (dfa->inveclosures != NULL((void*)0))

612

re_node_set_free (dfa->inveclosures + i)free ((dfa->inveclosures + i)->elems);

613

if (dfa->edests != NULL((void*)0))

614

re_node_set_free (dfa->edests + i)free ((dfa->edests + i)->elems);

615

}

616

re_free (dfa->edests)free (dfa->edests);

617

re_free (dfa->eclosures)free (dfa->eclosures);

618

re_free (dfa->inveclosures)free (dfa->inveclosures);

619

re_free (dfa->nodes)free (dfa->nodes);

620

621

if (dfa->state_table)

622

for (i = 0; i <= dfa->state_hash_mask; ++i)

623

{

624

struct re_state_table_entry *entry = dfa->state_table + i;

625

for (j = 0; j < entry->num; ++j)

626

{

627

re_dfastate_t *state = entry->array[j];

628

free_state (state);

629

}

630

re_free (entry->array)free (entry->array);

631

}

632

re_free (dfa->state_table)free (dfa->state_table);

633

#ifdef RE_ENABLE_I18N

634

if (dfa->sb_char != utf8_sb_map)

635

re_free (dfa->sb_char)free (dfa->sb_char);

636

#endif

637

re_free (dfa->subexp_map)free (dfa->subexp_map);

638

#ifdef DEBUG

639

re_free (dfa->re_str)free (dfa->re_str);

640

#endif

641

642

re_free (dfa)free (dfa);

643

}

644

645

646

/* Free dynamically allocated space used by PREG. */

647

648

void

649

regfree (regex_t *preg)

650

{

651

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

652

if (BE (dfa != NULL, 1)__builtin_expect (dfa != ((void*)0), 1))

653

free_dfa_content (dfa);

654

preg->buffer = NULL((void*)0);

655

preg->allocated = 0;

656

657

re_free (preg->fastmap)free (preg->fastmap);

658

preg->fastmap = NULL((void*)0);

659

660

re_free (preg->translate)free (preg->translate);

661

preg->translate = NULL((void*)0);

662

}

663

#ifdef _LIBC

664

weak_alias (__regfreeregfree, regfree)

665

#endif

666

667

/* Entry points compatible with 4.2 BSD regex library. We don't define

668

them unless specifically requested. */

669

670

#if defined _REGEX_RE_COMP || defined _LIBC

671

672

/* BSD has one and only one pattern buffer. */

673

static struct re_pattern_buffer re_comp_buf;

674

675

char *

676

# ifdef _LIBC

677

/* Make these definitions weak in libc, so POSIX programs can redefine

678

these names if they don't use our functions, and still use

679

regcomp/regexec above without link errors. */

680

weak_function

681

# endif

682

re_comp (s)

683

const char *s;

684

{

685

reg_errcode_t ret;

686

char *fastmap;

687

688

if (!s)

689

{

690

if (!re_comp_buf.buffer)

691

return gettext ("No previous regular expression")("No previous regular expression");

692

return 0;

693

}

694

695

if (re_comp_buf.buffer)

696

{

697

fastmap = re_comp_buf.fastmap;

698

re_comp_buf.fastmap = NULL((void*)0);

699

__regfreeregfree (&re_comp_buf);

700

memset (&re_comp_buf, '\0', sizeof (re_comp_buf))__builtin___memset_chk (&re_comp_buf, '\0', sizeof (re_comp_buf
), __builtin_object_size (&re_comp_buf, 0));

701

re_comp_buf.fastmap = fastmap;

702

}

703

704

if (re_comp_buf.fastmap == NULL((void*)0))

705

{

706

re_comp_buf.fastmap = (char *) malloc (SBC_MAX256);

707

if (re_comp_buf.fastmap == NULL((void*)0))

708

return (char *) gettext (__re_error_msgid(__re_error_msgid + __re_error_msgid_idx[(int) REG_ESPACE])

709

+ __re_error_msgid_idx[(int) REG_ESPACE])(__re_error_msgid + __re_error_msgid_idx[(int) REG_ESPACE]);

710

}

711

712

/* Since `re_exec' always passes NULL for the `regs' argument, we

713

don't need to initialize the pattern buffer fields which affect it. */

714

715

/* Match anchors at newlines. */

716

re_comp_buf.newline_anchor = 1;

717

718

ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);

719

720

if (!ret)

721

return NULL((void*)0);

722

723

/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */

724

return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret])(__re_error_msgid + __re_error_msgid_idx[(int) ret]);

725

}

726

727

#ifdef _LIBC

728

libc_freeres_fn (free_mem)

729

{

730

__regfreeregfree (&re_comp_buf);

731

}

732

#endif

733

734

#endif /* _REGEX_RE_COMP */

735

736

/* Internal entry point.

737

Compile the regular expression PATTERN, whose length is LENGTH.

738

SYNTAX indicate regular expression's syntax. */

739

740

static reg_errcode_t

741

re_compile_internal (regex_t *preg, const char * pattern, size_t length,

742

reg_syntax_t syntax)

743

{

744

reg_errcode_t err = REG_NOERROR;

745

re_dfa_t *dfa;

746

re_string_t regexp;

747

748

/* Initialize the pattern buffer. */

749

preg->fastmap_accurate = 0;

750

preg->syntax = syntax;

751

preg->not_bol = preg->not_eol = 0;

752

preg->used = 0;

753

preg->re_nsub = 0;

754

preg->can_be_null = 0;

755

preg->regs_allocated = REGS_UNALLOCATED0;

756

757

/* Initialize the dfa. */

758

dfa = (re_dfa_t *) preg->buffer;

759

if (BE (preg->allocated < sizeof (re_dfa_t), 0)__builtin_expect (preg->allocated < sizeof (re_dfa_t), 0
))

←

Taking true branch

→

760

{

761

/* If zero allocated, but buffer is non-null, try to realloc

762

enough space. This loses if buffer's address is bogus, but

763

that is the user's responsibility. If ->buffer is NULL this

764

is a simple allocation. */

765

dfa = re_realloc (preg->buffer, re_dfa_t, 1)((preg->buffer != ((void*)0)) ? (re_dfa_t *) realloc (preg
->buffer,(1)*sizeof(re_dfa_t)) : (re_dfa_t *) calloc(1,sizeof
(re_dfa_t)));

766

if (dfa == NULL((void*)0))

←

Assuming 'dfa' is not equal to null

→

←

Taking false branch

→

767

return REG_ESPACE;

768

preg->allocated = sizeof (re_dfa_t);

769

preg->buffer = (unsigned char *) dfa;

770

}

771

preg->used = sizeof (re_dfa_t);

772

773

err = init_dfa (dfa, length);

774

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

←

Taking false branch

→

775

{

776

free_dfa_content (dfa);

777

preg->buffer = NULL((void*)0);

778

preg->allocated = 0;

779

return err;

780

}

781

#ifdef DEBUG

782

/* Note: length+1 will not overflow since it is checked in init_dfa. */

783

dfa->re_str = re_malloc (char, length + 1)((char *) malloc ((length + 1) * sizeof (char)));

784

strncpy (dfa->re_str, pattern, length + 1)__builtin___strncpy_chk (dfa->re_str, pattern, length + 1,
__builtin_object_size (dfa->re_str, 2 > 1 ? 1 : 0));

785

#endif

786

787

__libc_lock_init (dfa->lock)do { } while (0);

788

789

err = re_string_construct (&regexp, pattern, length, preg->translate,

790

syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1), dfa);

791

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

←

Taking false branch

→

792

{

793

re_compile_internal_free_return:

794

free_workarea_compile (preg);

795

re_string_destruct (&regexp);

796

free_dfa_content (dfa);

797

preg->buffer = NULL((void*)0);

798

preg->allocated = 0;

799

return err;

800

}

801

802

/* Parse the regular expression, and build a structure tree. */

803

preg->re_nsub = 0;

804

dfa->str_tree = parse (&regexp, preg, syntax, &err);

←

Calling 'parse'

→

←

Returning from 'parse'

→

805

if (BE (dfa->str_tree == NULL, 0)__builtin_expect (dfa->str_tree == ((void*)0), 0))

←

Taking false branch

→

806

goto re_compile_internal_free_return;

807

808

/* Analyze the tree and create the nfa. */

809

err = analyze (preg);

810

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

←

Taking false branch

→

811

goto re_compile_internal_free_return;

812

813

#ifdef RE_ENABLE_I18N

814

/* If possible, do searching in single byte encoding to speed things up. */

815

if (dfa->is_utf8 && !(syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1)) && preg->translate == NULL((void*)0))

816

optimize_utf8 (dfa);

817

#endif

818

819

/* Then create the initial state of the dfa. */

820

err = create_initial_state (dfa);

←

Calling 'create_initial_state'

→

821

822

/* Release work areas. */

823

free_workarea_compile (preg);

824

re_string_destruct (&regexp);

825

826

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

827

{

828

free_dfa_content (dfa);

829

preg->buffer = NULL((void*)0);

830

preg->allocated = 0;

831

}

832

833

return err;

834

}

835

836

/* Initialize DFA. We use the length of the regular expression PAT_LEN

837

as the initial length of some arrays. */

838

839

static reg_errcode_t

840

init_dfa (re_dfa_t *dfa, size_t pat_len)

841

{

842

unsigned int table_size;

843

#ifndef _LIBC

844

char *codeset_name;

845

#endif

846

847

memset (dfa, '\0', sizeof (re_dfa_t))__builtin___memset_chk (dfa, '\0', sizeof (re_dfa_t), __builtin_object_size
(dfa, 0));

848

849

/* Force allocation of str_tree_storage the first time. */

850

dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE((1024 - sizeof (void *)) / sizeof (bin_tree_t));

851

852

/* Avoid overflows. */

853

if (pat_len == SIZE_MAX18446744073709551615ULL)

854

return REG_ESPACE;

855

856

dfa->nodes_alloc = pat_len + 1;

857

dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc)((re_token_t *) malloc ((dfa->nodes_alloc) * sizeof (re_token_t
)));

858

859

/* table_size = 2 ^ ceil(log pat_len) */

860

for (table_size = 1; ; table_size <<= 1)

861

if (table_size > pat_len)

862

break;

863

864

dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);

865

dfa->state_hash_mask = table_size - 1;

866

867

dfa->mb_cur_max = MB_CUR_MAX__mb_cur_max;

868

#ifdef _LIBC

869

if (dfa->mb_cur_max == 6

870

&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)

871

dfa->is_utf8 = 1;

872

dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)

873

!= 0);

874

#else

875

# ifdef HAVE_LANGINFO_CODESET

876

codeset_name = nl_langinfo (CODESET);

877

# else

878

codeset_name = getenv ("LC_ALL");

879

if (codeset_name == NULL((void*)0) || codeset_name[0] == '\0')

880

codeset_name = getenv ("LC_CTYPE");

881

if (codeset_name == NULL((void*)0) || codeset_name[0] == '\0')

882

codeset_name = getenv ("LANG");

883

if (codeset_name == NULL((void*)0))

884

codeset_name = "";

885

else if (strchr (codeset_name, '.') != NULL((void*)0))

886

codeset_name = strchr (codeset_name, '.') + 1;

887

# endif

888

889

/* strcasecmp isn't a standard interface. brute force check */

890

#if 0

891

if (strcasecmp (codeset_name, "UTF-8") == 0

892

|| strcasecmp (codeset_name, "UTF8") == 0)

893

dfa->is_utf8 = 1;

894

#else

895

if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u')

896

&& (codeset_name[1] == 'T' || codeset_name[1] == 't')

897

&& (codeset_name[2] == 'F' || codeset_name[2] == 'f')

898

&& (codeset_name[3] == '-'

899

? codeset_name[4] == '8' && codeset_name[5] == '\0'

900

: codeset_name[3] == '8' && codeset_name[4] == '\0'))

901

dfa->is_utf8 = 1;

902

#endif

903

904

/* We check exhaustively in the loop below if this charset is a

905

superset of ASCII. */

906

dfa->map_notascii = 0;

907

#endif

908

909

#ifdef RE_ENABLE_I18N

910

if (dfa->mb_cur_max > 1)

911

{

912

if (dfa->is_utf8)

913

{

914

#if !defined(__GNUC__4) || __GNUC__4 < 3

915

static short utf8_sb_map_inited = 0;

916

917

if (! utf8_sb_map_inited)

918

{

919

int i;

920

921

utf8_sb_map_inited = 0;

922

for (i = 0; i <= 0x80 / BITSET_WORD_BITS(sizeof (bitset_word_t) * 8) - 1; i++)

923

utf8_sb_map[i] = BITSET_WORD_MAX(9223372036854775807L *2UL+1UL);

924

}

925

#endif

926

dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;

927

}

928

else

929

{

930

int i, j, ch;

931

932

dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);

933

if (BE (dfa->sb_char == NULL, 0)__builtin_expect (dfa->sb_char == ((void*)0), 0))

934

return REG_ESPACE;

935

936

/* Set the bits corresponding to single byte chars. */

937

for (i = 0, ch = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)

938

for (j = 0; j < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); ++j, ++ch)

939

{

940

wint_t wch = __btowcbtowc (ch);

941

if (wch != WEOF)

942

dfa->sb_char[i] |= (bitset_word_t) 1 << j;

943

# ifndef _LIBC

944

if (isascii (ch) && wch != ch)

945

dfa->map_notascii = 1;

946

# endif

947

}

948

}

949

}

950

#endif

951

952

if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)__builtin_expect (dfa->nodes == ((void*)0) || dfa->state_table
== ((void*)0), 0))

953

return REG_ESPACE;

954

return REG_NOERROR;

955

}

956

957

/* Initialize WORD_CHAR table, which indicate which character is

958

"word". In this case "word" means that it is the word construction

959

character used by some operators like "\<", "\>", etc. */

960

961

static void

962

internal_function

963

init_word_char (re_dfa_t *dfa)

964

{

965

int i, j, ch;

966

dfa->word_ops_used = 1;

967

for (i = 0, ch = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)

968

for (j = 0; j < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); ++j, ++ch)

969

if (isalnum (ch) || ch == '_')

970

dfa->word_char[i] |= (bitset_word_t) 1 << j;

971

}

972

973

/* Free the work area which are only used while compiling. */

974

975

static void

976

free_workarea_compile (regex_t *preg)

977

{

978

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

979

bin_tree_storage_t *storage, *next;

980

for (storage = dfa->str_tree_storage; storage; storage = next)

981

{

982

next = storage->next;

983

re_free (storage)free (storage);

984

}

985

dfa->str_tree_storage = NULL((void*)0);

986

dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE((1024 - sizeof (void *)) / sizeof (bin_tree_t));

987

dfa->str_tree = NULL((void*)0);

988

re_free (dfa->org_indices)free (dfa->org_indices);

989

dfa->org_indices = NULL((void*)0);

990

}

991

992

/* Create initial states for all contexts. */

993

994

static reg_errcode_t

995

create_initial_state (re_dfa_t *dfa)

996

{

997

int first, i;

998

reg_errcode_t err;

999

re_node_set init_nodes;

1000

1001

/* Initial states have the epsilon closure of the node which is

1002

the first node of the regular expression. */

1003

first = dfa->str_tree->first->node_idx;

←

Access to field 'node_idx' results in a dereference of a null pointer (loaded from field 'first')

1004

dfa->init_node = first;

1005

err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);

1006

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1007

return err;

1008

1009

/* The back-references which are in initial states can epsilon transit,

1010

since in this case all of the subexpressions can be null.

1011

Then we add epsilon closures of the nodes which are the next nodes of

1012

the back-references. */

1013

if (dfa->nbackref > 0)

1014

for (i = 0; i < init_nodes.nelem; ++i)

1015

{

1016

int node_idx = init_nodes.elems[i];

1017

re_token_type_t type = dfa->nodes[node_idx].type;

1018

1019

int clexp_idx;

1020

if (type != OP_BACK_REF)

1021

continue;

1022

for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)

1023

{

1024

re_token_t *clexp_node;

1025

clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];

1026

if (clexp_node->type == OP_CLOSE_SUBEXP

1027

&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)

1028

break;

1029

}

1030

if (clexp_idx == init_nodes.nelem)

1031

continue;

1032

1033

if (type == OP_BACK_REF)

1034

{

1035

int dest_idx = dfa->edests[node_idx].elems[0];

1036

if (!re_node_set_contains (&init_nodes, dest_idx))

1037

{

1038

reg_errcode_t err = re_node_set_merge (&init_nodes,

1039

dfa->eclosures

1040

+ dest_idx);

1041

if (err != REG_NOERROR)

1042

return err;

1043

i = 0;

1044

}

1045

}

1046

}

1047

1048

/* It must be the first time to invoke acquire_state. */

1049

dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);

1050

/* We don't check ERR here, since the initial state must not be NULL. */

1051

if (BE (dfa->init_state == NULL, 0)__builtin_expect (dfa->init_state == ((void*)0), 0))

1052

return err;

1053

if (dfa->init_state->has_constraint)

1054

{

1055

dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,

1056

CONTEXT_WORD1);

1057

dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,

1058

CONTEXT_NEWLINE(1 << 1));

1059

dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,

1060

&init_nodes,

1061

CONTEXT_NEWLINE(1 << 1)

1062

| CONTEXT_BEGBUF((1 << 1) << 1));

1063

if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL__builtin_expect (dfa->init_state_word == ((void*)0) || dfa
->init_state_nl == ((void*)0) || dfa->init_state_begbuf
== ((void*)0), 0)

1064

|| dfa->init_state_begbuf == NULL, 0)__builtin_expect (dfa->init_state_word == ((void*)0) || dfa
->init_state_nl == ((void*)0) || dfa->init_state_begbuf
== ((void*)0), 0))

1065

return err;

1066

}

1067

else

1068

dfa->init_state_word = dfa->init_state_nl

1069

= dfa->init_state_begbuf = dfa->init_state;

1070

1071

re_node_set_free (&init_nodes)free ((&init_nodes)->elems);

1072

return REG_NOERROR;

1073

}

1074

1075

#ifdef RE_ENABLE_I18N

1076

/* If it is possible to do searching in single byte encoding instead of UTF-8

1077

to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change

1078

DFA nodes where needed. */

1079

1080

static void

1081

optimize_utf8 (re_dfa_t *dfa)

1082

{

1083

int node, i, mb_chars = 0, has_period = 0;

1084

1085

for (node = 0; node < dfa->nodes_len; ++node)

1086

switch (dfa->nodes[node].type)

1087

{

1088

case CHARACTER:

1089

if (dfa->nodes[node].opr.c >= 0x80)

1090

mb_chars = 1;

1091

break;

1092

case ANCHOR:

1093

switch (dfa->nodes[node].opr.ctx_type)

1094

{

1095

case LINE_FIRST:

1096

case LINE_LAST:

1097

case BUF_FIRST:

1098

case BUF_LAST:

1099

break;

1100

default:

1101

/* Word anchors etc. cannot be handled. It's okay to test

1102

opr.ctx_type since constraints (for all DFA nodes) are

1103

created by ORing one or more opr.ctx_type values. */

1104

return;

1105

}

1106

break;

1107

case OP_PERIOD:

1108

has_period = 1;

1109

break;

1110

case OP_BACK_REF:

1111

case OP_ALT:

1112

case END_OF_RE:

1113

case OP_DUP_ASTERISK:

1114

case OP_OPEN_SUBEXP:

1115

case OP_CLOSE_SUBEXP:

1116

break;

1117

case COMPLEX_BRACKET:

1118

return;

1119

case SIMPLE_BRACKET:

1120

/* Just double check. The non-ASCII range starts at 0x80. */

1121

assert (0x80 % BITSET_WORD_BITS == 0)(__builtin_expect(!(0x80 % (sizeof (bitset_word_t) * 8) == 0)
, 0) ? __assert_rtn(__func__, "compat/regex/regcomp.c", 1121,
"0x80 % BITSET_WORD_BITS == 0") : (void)0);

1122

for (i = 0x80 / BITSET_WORD_BITS(sizeof (bitset_word_t) * 8); i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)

1123

if (dfa->nodes[node].opr.sbcset[i])

1124

return;

1125

break;

1126

default:

1127

abort ();

1128

}

1129

1130

if (mb_chars || has_period)

1131

for (node = 0; node < dfa->nodes_len; ++node)

1132

{

1133

if (dfa->nodes[node].type == CHARACTER

1134

&& dfa->nodes[node].opr.c >= 0x80)

1135

dfa->nodes[node].mb_partial = 0;

1136

else if (dfa->nodes[node].type == OP_PERIOD)

1137

dfa->nodes[node].type = OP_UTF8_PERIOD;

1138

}

1139

1140

/* The search can be in single byte locale. */

1141

dfa->mb_cur_max = 1;

1142

dfa->is_utf8 = 0;

1143

dfa->has_mb_node = dfa->nbackref > 0 || has_period;

1144

}

1145

#endif

1146

1147

/* Analyze the structure tree, and calculate "first", "next", "edest",

1148

"eclosure", and "inveclosure". */

1149

1150

static reg_errcode_t

1151

analyze (regex_t *preg)

1152

{

1153

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

1154

reg_errcode_t ret;

1155

1156

/* Allocate arrays. */

1157

dfa->nexts = re_malloc (int, dfa->nodes_alloc)((int *) malloc ((dfa->nodes_alloc) * sizeof (int)));

1158

dfa->org_indices = re_malloc (int, dfa->nodes_alloc)((int *) malloc ((dfa->nodes_alloc) * sizeof (int)));

1159

dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc)((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set
)));

1160

dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc)((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set
)));

1161

if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL__builtin_expect (dfa->nexts == ((void*)0) || dfa->org_indices
== ((void*)0) || dfa->edests == ((void*)0) || dfa->eclosures
== ((void*)0), 0)

1162

|| dfa->eclosures == NULL, 0)__builtin_expect (dfa->nexts == ((void*)0) || dfa->org_indices
== ((void*)0) || dfa->edests == ((void*)0) || dfa->eclosures
== ((void*)0), 0))

1163

return REG_ESPACE;

1164

1165

dfa->subexp_map = re_malloc (int, preg->re_nsub)((int *) malloc ((preg->re_nsub) * sizeof (int)));

1166

if (dfa->subexp_map != NULL((void*)0))

1167

{

1168

int i;

1169

for (i = 0; i < preg->re_nsub; i++)

1170

dfa->subexp_map[i] = i;

1171

preorder (dfa->str_tree, optimize_subexps, dfa);

1172

for (i = 0; i < preg->re_nsub; i++)

1173

if (dfa->subexp_map[i] != i)

1174

break;

1175

if (i == preg->re_nsub)

1176

{

1177

free (dfa->subexp_map);

1178

dfa->subexp_map = NULL((void*)0);

1179

}

1180

}

1181

1182

ret = postorder (dfa->str_tree, lower_subexps, preg);

1183

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

1184

return ret;

1185

ret = postorder (dfa->str_tree, calc_first, dfa);

1186

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

1187

return ret;

1188

preorder (dfa->str_tree, calc_next, dfa);

1189

ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);

1190

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

1191

return ret;

1192

ret = calc_eclosure (dfa);

1193

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

1194

return ret;

1195

1196

/* We only need this during the prune_impossible_nodes pass in regexec.c;

1197

skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */

1198

if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)

1199

|| dfa->nbackref)

1200

{

1201

dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len)((re_node_set *) malloc ((dfa->nodes_len) * sizeof (re_node_set
)));

1202

if (BE (dfa->inveclosures == NULL, 0)__builtin_expect (dfa->inveclosures == ((void*)0), 0))

1203

return REG_ESPACE;

1204

ret = calc_inveclosure (dfa);

1205

}

1206

1207

return ret;

1208

}

1209

1210

/* Our parse trees are very unbalanced, so we cannot use a stack to

1211

implement parse tree visits. Instead, we use parent pointers and

1212

some hairy code in these two functions. */

1213

static reg_errcode_t

1214

postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),

1215

void *extra)

1216

{

1217

bin_tree_t *node, *prev;

1218

1219

for (node = root; ; )

1220

{

1221

/* Descend down the tree, preferably to the left (or to the right

1222

if that's the only child). */

1223

while (node->left || node->right)

1224

if (node->left)

1225

node = node->left;

1226

else

1227

node = node->right;

1228

1229

1230

{

1231

reg_errcode_t err = fn (extra, node);

1232

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1233

return err;

1234

if (node->parent == NULL((void*)0))

1235

return REG_NOERROR;

1236

prev = node;

1237

node = node->parent;

1238

}

1239

/* Go up while we have a node that is reached from the right. */

1240

while (node->right == prev || node->right == NULL((void*)0));

1241

node = node->right;

1242

}

1243

}

1244

1245

static reg_errcode_t

1246

preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),

1247

void *extra)

1248

{

1249

bin_tree_t *node;

1250

1251

for (node = root; ; )

1252

{

1253

reg_errcode_t err = fn (extra, node);

1254

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1255

return err;

1256

1257

/* Go to the left node, or up and to the right. */

1258

if (node->left)

1259

node = node->left;

1260

else

1261

{

1262

bin_tree_t *prev = NULL((void*)0);

1263

while (node->right == prev || node->right == NULL((void*)0))

1264

{

1265

prev = node;

1266

node = node->parent;

1267

if (!node)

1268

return REG_NOERROR;

1269

}

1270

node = node->right;

1271

}

1272

}

1273

}

1274

1275

/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell

1276

re_search_internal to map the inner one's opr.idx to this one's. Adjust

1277

backreferences as well. Requires a preorder visit. */

1278

static reg_errcode_t

1279

optimize_subexps (void *extra, bin_tree_t *node)

1280

{

1281

re_dfa_t *dfa = (re_dfa_t *) extra;

1282

1283

if (node->token.type == OP_BACK_REF && dfa->subexp_map)

1284

{

1285

int idx = node->token.opr.idx;

1286

node->token.opr.idx = dfa->subexp_map[idx];

1287

dfa->used_bkref_map |= 1 << node->token.opr.idx;

1288

}

1289

1290

else if (node->token.type == SUBEXP

1291

&& node->left && node->left->token.type == SUBEXP)

1292

{

1293

int other_idx = node->left->token.opr.idx;

1294

1295

node->left = node->left->left;

1296

if (node->left)

1297

node->left->parent = node;

1298

1299

dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];

1300

if (other_idx < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8))

1301

dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);

1302

}

1303

1304

return REG_NOERROR;

1305

}

1306

1307

/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation

1308

of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */

1309

static reg_errcode_t

1310

lower_subexps (void *extra, bin_tree_t *node)

1311

{

1312

regex_t *preg = (regex_t *) extra;

1313

reg_errcode_t err = REG_NOERROR;

1314

1315

if (node->left && node->left->token.type == SUBEXP)

1316

{

1317

node->left = lower_subexp (&err, preg, node->left);

1318

if (node->left)

1319

node->left->parent = node;

1320

}

1321

if (node->right && node->right->token.type == SUBEXP)

1322

{

1323

node->right = lower_subexp (&err, preg, node->right);

1324

if (node->right)

1325

node->right->parent = node;

1326

}

1327

1328

return err;

1329

}

1330

1331

static bin_tree_t *

1332

lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)

1333

{

1334

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

1335

bin_tree_t *body = node->left;

1336

bin_tree_t *op, *cls, *tree1, *tree;

1337

1338

if (preg->no_sub

1339

/* We do not optimize empty subexpressions, because otherwise we may

1340

have bad CONCAT nodes with NULL children. This is obviously not

1341

very common, so we do not lose much. An example that triggers

1342

this case is the sed "script" //x. */

1343

&& node->left != NULL((void*)0)

1344

&& (node->token.opr.idx >= BITSET_WORD_BITS(sizeof (bitset_word_t) * 8)

1345

|| !(dfa->used_bkref_map

1346

& ((bitset_word_t) 1 << node->token.opr.idx))))

1347

return node->left;

1348

1349

/* Convert the SUBEXP node to the concatenation of an

1350

OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */

1351

op = create_tree (dfa, NULL((void*)0), NULL((void*)0), OP_OPEN_SUBEXP);

1352

cls = create_tree (dfa, NULL((void*)0), NULL((void*)0), OP_CLOSE_SUBEXP);

1353

tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;

1354

tree = create_tree (dfa, op, tree1, CONCAT);

1355

if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)__builtin_expect (tree == ((void*)0) || tree1 == ((void*)0) ||
op == ((void*)0) || cls == ((void*)0), 0))

1356

{

1357

*err = REG_ESPACE;

1358

return NULL((void*)0);

1359

}

1360

1361

op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;

1362

op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;

1363

return tree;

1364

}

1365

1366

/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton

1367

nodes. Requires a postorder visit. */

1368

static reg_errcode_t

1369

calc_first (void *extra, bin_tree_t *node)

1370

{

1371

re_dfa_t *dfa = (re_dfa_t *) extra;

1372

if (node->token.type == CONCAT)

1373

{

1374

node->first = node->left->first;

1375

node->node_idx = node->left->node_idx;

1376

}

1377

else

1378

{

1379

node->first = node;

1380

node->node_idx = re_dfa_add_node (dfa, node->token);

1381

if (BE (node->node_idx == -1, 0)__builtin_expect (node->node_idx == -1, 0))

1382

return REG_ESPACE;

1383

if (node->token.type == ANCHOR)

1384

dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;

1385

}

1386

return REG_NOERROR;

1387

}

1388

1389

/* Pass 2: compute NEXT on the tree. Preorder visit. */

1390

static reg_errcode_t

1391

calc_next (void *extra, bin_tree_t *node)

1392

{

1393

switch (node->token.type)

1394

{

1395

case OP_DUP_ASTERISK:

1396

node->left->next = node;

1397

break;

1398

case CONCAT:

1399

node->left->next = node->right->first;

1400

node->right->next = node->next;

1401

break;

1402

default:

1403

if (node->left)

1404

node->left->next = node->next;

1405

if (node->right)

1406

node->right->next = node->next;

1407

break;

1408

}

1409

return REG_NOERROR;

1410

}

1411

1412

/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */

1413

static reg_errcode_t

1414

link_nfa_nodes (void *extra, bin_tree_t *node)

1415

{

1416

re_dfa_t *dfa = (re_dfa_t *) extra;

1417

int idx = node->node_idx;

1418

reg_errcode_t err = REG_NOERROR;

1419

1420

switch (node->token.type)

1421

{

1422

case CONCAT:

1423

break;

1424

1425

case END_OF_RE:

1426

assert (node->next == NULL)(__builtin_expect(!(node->next == ((void*)0)), 0) ? __assert_rtn
(__func__, "compat/regex/regcomp.c", 1426, "node->next == NULL"
) : (void)0);

1427

break;

1428

1429

case OP_DUP_ASTERISK:

1430

case OP_ALT:

1431

{

1432

int left, right;

1433

dfa->has_plural_match = 1;

1434

if (node->left != NULL((void*)0))

1435

left = node->left->first->node_idx;

1436

else

1437

left = node->next->node_idx;

1438

if (node->right != NULL((void*)0))

1439

right = node->right->first->node_idx;

1440

else

1441

right = node->next->node_idx;

1442

assert (left > -1)(__builtin_expect(!(left > -1), 0) ? __assert_rtn(__func__
, "compat/regex/regcomp.c", 1442, "left > -1") : (void)0);

1443

assert (right > -1)(__builtin_expect(!(right > -1), 0) ? __assert_rtn(__func__
, "compat/regex/regcomp.c", 1443, "right > -1") : (void)0);

1444

err = re_node_set_init_2 (dfa->edests + idx, left, right);

1445

}

1446

break;

1447

1448

case ANCHOR:

1449

case OP_OPEN_SUBEXP:

1450

case OP_CLOSE_SUBEXP:

1451

err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);

1452

break;

1453

1454

case OP_BACK_REF:

1455

dfa->nexts[idx] = node->next->node_idx;

1456

if (node->token.type == OP_BACK_REF)

1457

err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);

1458

break;

1459

1460

default:

1461

assert (!IS_EPSILON_NODE (node->token.type))(__builtin_expect(!(!((node->token.type) & 8)), 0) ? __assert_rtn
(__func__, "compat/regex/regcomp.c", 1461, "!IS_EPSILON_NODE (node->token.type)"
) : (void)0);

1462

dfa->nexts[idx] = node->next->node_idx;

1463

break;

1464

}

1465

1466

return err;

1467

}

1468

1469

/* Duplicate the epsilon closure of the node ROOT_NODE.

1470

Note that duplicated nodes have constraint INIT_CONSTRAINT in addition

1471

to their own constraint. */

1472

1473

static reg_errcode_t

1474

internal_function

1475

duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,

1476

int root_node, unsigned int init_constraint)

1477

{

1478

int org_node, clone_node, ret;

1479

unsigned int constraint = init_constraint;

1480

for (org_node = top_org_node, clone_node = top_clone_node;;)

1481

{

1482

int org_dest, clone_dest;

1483

if (dfa->nodes[org_node].type == OP_BACK_REF)

1484

{

1485

/* If the back reference epsilon-transit, its destination must

1486

also have the constraint. Then duplicate the epsilon closure

1487

of the destination of the back reference, and store it in

1488

edests of the back reference. */

1489

org_dest = dfa->nexts[org_node];

1490

re_node_set_empty (dfa->edests + clone_node)((dfa->edests + clone_node)->nelem = 0);

1491

clone_dest = duplicate_node (dfa, org_dest, constraint);

1492

if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))

1493

return REG_ESPACE;

1494

dfa->nexts[clone_node] = dfa->nexts[org_node];

1495

ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);

1496

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1497

return REG_ESPACE;

1498

}

1499

else if (dfa->edests[org_node].nelem == 0)

1500

{

1501

/* In case of the node can't epsilon-transit, don't duplicate the

1502

destination and store the original destination as the

1503

destination of the node. */

1504

dfa->nexts[clone_node] = dfa->nexts[org_node];

1505

break;

1506

}

1507

else if (dfa->edests[org_node].nelem == 1)

1508

{

1509

/* In case of the node can epsilon-transit, and it has only one

1510

destination. */

1511

org_dest = dfa->edests[org_node].elems[0];

1512

re_node_set_empty (dfa->edests + clone_node)((dfa->edests + clone_node)->nelem = 0);

1513

/* If the node is root_node itself, it means the epsilon clsoure

1514

has a loop. Then tie it to the destination of the root_node. */

1515

if (org_node == root_node && clone_node != org_node)

1516

{

1517

ret = re_node_set_insert (dfa->edests + clone_node, org_dest);

1518

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1519

return REG_ESPACE;

1520

break;

1521

}

1522

/* In case of the node has another constraint, add it. */

1523

constraint |= dfa->nodes[org_node].constraint;

1524

clone_dest = duplicate_node (dfa, org_dest, constraint);

1525

if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))

1526

return REG_ESPACE;

1527

ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);

1528

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1529

return REG_ESPACE;

1530

}

1531

else /* dfa->edests[org_node].nelem == 2 */

1532

{

1533

/* In case of the node can epsilon-transit, and it has two

1534

destinations. In the bin_tree_t and DFA, that's '|' and '*'. */

1535

org_dest = dfa->edests[org_node].elems[0];

1536

re_node_set_empty (dfa->edests + clone_node)((dfa->edests + clone_node)->nelem = 0);

1537

/* Search for a duplicated node which satisfies the constraint. */

1538

clone_dest = search_duplicated_node (dfa, org_dest, constraint);

1539

if (clone_dest == -1)

1540

{

1541

/* There is no such duplicated node, create a new one. */

1542

reg_errcode_t err;

1543

clone_dest = duplicate_node (dfa, org_dest, constraint);

1544

if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))

1545

return REG_ESPACE;

1546

ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);

1547

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1548

return REG_ESPACE;

1549

err = duplicate_node_closure (dfa, org_dest, clone_dest,

1550

root_node, constraint);

1551

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1552

return err;

1553

}

1554

else

1555

{

1556

/* There is a duplicated node which satisfies the constraint,

1557

use it to avoid infinite loop. */

1558

ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);

1559

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1560

return REG_ESPACE;

1561

}

1562

1563

org_dest = dfa->edests[org_node].elems[1];

1564

clone_dest = duplicate_node (dfa, org_dest, constraint);

1565

if (BE (clone_dest == -1, 0)__builtin_expect (clone_dest == -1, 0))

1566

return REG_ESPACE;

1567

ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);

1568

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1569

return REG_ESPACE;

1570

}

1571

org_node = org_dest;

1572

clone_node = clone_dest;

1573

}

1574

return REG_NOERROR;

1575

}

1576

1577

/* Search for a node which is duplicated from the node ORG_NODE, and

1578

satisfies the constraint CONSTRAINT. */

1579

1580

static int

1581

search_duplicated_node (const re_dfa_t *dfa, int org_node,

1582

unsigned int constraint)

1583

{

1584

int idx;

1585

for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)

1586

{

1587

if (org_node == dfa->org_indices[idx]

1588

&& constraint == dfa->nodes[idx].constraint)

1589

return idx; /* Found. */

1590

}

1591

return -1; /* Not found. */

1592

}

1593

1594

/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.

1595

Return the index of the new node, or -1 if insufficient storage is

1596

available. */

1597

1598

static int

1599

duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)

1600

{

1601

int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);

1602

if (BE (dup_idx != -1, 1)__builtin_expect (dup_idx != -1, 1))

1603

{

1604

dfa->nodes[dup_idx].constraint = constraint;

1605

dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;

1606

dfa->nodes[dup_idx].duplicated = 1;

1607

1608

/* Store the index of the original node. */

1609

dfa->org_indices[dup_idx] = org_idx;

1610

}

1611

return dup_idx;

1612

}

1613

1614

static reg_errcode_t

1615

calc_inveclosure (re_dfa_t *dfa)

1616

{

1617

int src, idx, ret;

1618

for (idx = 0; idx < dfa->nodes_len; ++idx)

1619

re_node_set_init_empty (dfa->inveclosures + idx)__builtin___memset_chk (dfa->inveclosures + idx, '\0', sizeof
(re_node_set), __builtin_object_size (dfa->inveclosures +
idx, 0));

1620

1621

for (src = 0; src < dfa->nodes_len; ++src)

1622

{

1623

int *elems = dfa->eclosures[src].elems;

1624

for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)

1625

{

1626

ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);

1627

if (BE (ret == -1, 0)__builtin_expect (ret == -1, 0))

1628

return REG_ESPACE;

1629

}

1630

}

1631

1632

return REG_NOERROR;

1633

}

1634

1635

/* Calculate "eclosure" for all the node in DFA. */

1636

1637

static reg_errcode_t

1638

calc_eclosure (re_dfa_t *dfa)

1639

{

1640

int node_idx, incomplete;

1641

#ifdef DEBUG

1642

assert (dfa->nodes_len > 0)(__builtin_expect(!(dfa->nodes_len > 0), 0) ? __assert_rtn
(__func__, "compat/regex/regcomp.c", 1642, "dfa->nodes_len > 0"
) : (void)0);

1643

#endif

1644

incomplete = 0;

1645

/* For each nodes, calculate epsilon closure. */

1646

for (node_idx = 0; ; ++node_idx)

1647

{

1648

reg_errcode_t err;

1649

re_node_set eclosure_elem;

1650

if (node_idx == dfa->nodes_len)

1651

{

1652

if (!incomplete)

1653

break;

1654

incomplete = 0;

1655

node_idx = 0;

1656

}

1657

1658

#ifdef DEBUG

1659

assert (dfa->eclosures[node_idx].nelem != -1)(__builtin_expect(!(dfa->eclosures[node_idx].nelem != -1),
0) ? __assert_rtn(__func__, "compat/regex/regcomp.c", 1659, "dfa->eclosures[node_idx].nelem != -1"
) : (void)0);

1660

#endif

1661

1662

/* If we have already calculated, skip it. */

1663

if (dfa->eclosures[node_idx].nelem != 0)

1664

continue;

1665

/* Calculate epsilon closure of `node_idx'. */

1666

err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);

1667

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1668

return err;

1669

1670

if (dfa->eclosures[node_idx].nelem == 0)

1671

{

1672

incomplete = 1;

1673

re_node_set_free (&eclosure_elem)free ((&eclosure_elem)->elems);

1674

}

1675

}

1676

return REG_NOERROR;

1677

}

1678

1679

/* Calculate epsilon closure of NODE. */

1680

1681

static reg_errcode_t

1682

calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)

1683

{

1684

reg_errcode_t err;

1685

int i;

1686

re_node_set eclosure;

1687

int ret;

1688

int incomplete = 0;

1689

err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);

1690

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1691

return err;

1692

1693

/* This indicates that we are calculating this node now.

1694

We reference this value to avoid infinite loop. */

1695

dfa->eclosures[node].nelem = -1;

1696

1697

/* If the current node has constraints, duplicate all nodes

1698

since they must inherit the constraints. */

1699

if (dfa->nodes[node].constraint

1700

&& dfa->edests[node].nelem

1701

&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)

1702

{

1703

err = duplicate_node_closure (dfa, node, node, node,

1704

dfa->nodes[node].constraint);

1705

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1706

return err;

1707

}

1708

1709

/* Expand each epsilon destination nodes. */

1710

if (IS_EPSILON_NODE(dfa->nodes[node].type)((dfa->nodes[node].type) & 8))

1711

for (i = 0; i < dfa->edests[node].nelem; ++i)

1712

{

1713

re_node_set eclosure_elem;

1714

int edest = dfa->edests[node].elems[i];

1715

/* If calculating the epsilon closure of `edest' is in progress,

1716

return intermediate result. */

1717

if (dfa->eclosures[edest].nelem == -1)

1718

{

1719

incomplete = 1;

1720

continue;

1721

}

1722

/* If we haven't calculated the epsilon closure of `edest' yet,

1723

calculate now. Otherwise use calculated epsilon closure. */

1724

if (dfa->eclosures[edest].nelem == 0)

1725

{

1726

err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);

1727

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1728

return err;

1729

}

1730

else

1731

eclosure_elem = dfa->eclosures[edest];

1732

/* Merge the epsilon closure of `edest'. */

1733

err = re_node_set_merge (&eclosure, &eclosure_elem);

1734

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1735

return err;

1736

/* If the epsilon closure of `edest' is incomplete,

1737

the epsilon closure of this node is also incomplete. */

1738

if (dfa->eclosures[edest].nelem == 0)

1739

{

1740

incomplete = 1;

1741

re_node_set_free (&eclosure_elem)free ((&eclosure_elem)->elems);

1742

}

1743

}

1744

1745

/* An epsilon closure includes itself. */

1746

ret = re_node_set_insert (&eclosure, node);

1747

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

1748

return REG_ESPACE;

1749

if (incomplete && !root)

1750

dfa->eclosures[node].nelem = 0;

1751

else

1752

dfa->eclosures[node] = eclosure;

1753

*new_set = eclosure;

1754

return REG_NOERROR;

1755

}

1756

1757

/* Functions for token which are used in the parser. */

1758

1759

/* Fetch a token from INPUT.

1760

We must not use this function inside bracket expressions. */

1761

1762

static void

1763

internal_function

1764

fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)

1765

{

1766

re_string_skip_bytes (input, peek_token (result, input, syntax))((input)->cur_idx += (peek_token (result, input, syntax)));

1767

}

1768

1769

/* Peek a token from INPUT, and return the length of the token.

1770

We must not use this function inside bracket expressions. */

1771

1772

static int

1773

internal_function

1774

peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)

1775

{

1776

unsigned char c;

1777

1778

if (re_string_eoi (input)((input)->stop <= (input)->cur_idx))

1779

{

1780

token->type = END_OF_RE;

1781

return 0;

1782

}

1783

1784

c = re_string_peek_byte (input, 0)((input)->mbs[(input)->cur_idx + 0]);

1785

token->opr.c = c;

1786

1787

token->word_char = 0;

1788

#ifdef RE_ENABLE_I18N

1789

token->mb_partial = 0;

1790

if (input->mb_cur_max > 1 &&

1791

!re_string_first_byte (input, re_string_cur_idx (input))((((input)->cur_idx)) == (input)->valid_len || (input)->
wcs[((input)->cur_idx)] != WEOF))

1792

{

1793

token->type = CHARACTER;

1794

token->mb_partial = 1;

1795

return 1;

1796

}

1797

#endif

1798

if (c == '\\')

1799

{

1800

unsigned char c2;

1801

if (re_string_cur_idx (input)((input)->cur_idx) + 1 >= re_string_length (input)((input)->len))

1802

{

1803

token->type = BACK_SLASH;

1804

return 1;

1805

}

1806

1807

c2 = re_string_peek_byte_case (input, 1);

1808

token->opr.c = c2;

1809

token->type = CHARACTER;

1810

#ifdef RE_ENABLE_I18N

1811

if (input->mb_cur_max > 1)

1812

{

1813

wint_t wc = re_string_wchar_at (input,

1814

re_string_cur_idx (input)((input)->cur_idx) + 1);

1815

token->word_char = IS_WIDE_WORD_CHAR (wc)(iswalnum (wc) || (wc) == L'_') != 0;

1816

}

1817

else

1818

#endif

1819

token->word_char = IS_WORD_CHAR (c2)(isalnum (c2) || (c2) == '_') != 0;

1820

1821

switch (c2)

1822

{

1823

case '|':

1824

if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)) && !(syntax & RE_NO_BK_VBAR(((((((((((((((((unsigned long int) 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1)))

1825

token->type = OP_ALT;

1826

break;

1827

case '1': case '2': case '3': case '4': case '5':

1828

case '6': case '7': case '8': case '9':

1829

if (!(syntax & RE_NO_BK_REFS((((((((((((((((unsigned long int) 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)))

1830

{

1831

token->type = OP_BACK_REF;

1832

token->opr.idx = c2 - '1';

1833

}

1834

break;

1835

case '<':

1836

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1837

{

1838

token->type = ANCHOR;

1839

token->opr.ctx_type = WORD_FIRST;

1840

}

1841

break;

1842

case '>':

1843

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1844

{

1845

token->type = ANCHOR;

1846

token->opr.ctx_type = WORD_LAST;

1847

}

1848

break;

1849

case 'b':

1850

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1851

{

1852

token->type = ANCHOR;

1853

token->opr.ctx_type = WORD_DELIM;

1854

}

1855

break;

1856

case 'B':

1857

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1858

{

1859

token->type = ANCHOR;

1860

token->opr.ctx_type = NOT_WORD_DELIM;

1861

}

1862

break;

1863

case 'w':

1864

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1865

token->type = OP_WORD;

1866

break;

1867

case 'W':

1868

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1869

token->type = OP_NOTWORD;

1870

break;

1871

case 's':

1872

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1873

token->type = OP_SPACE;

1874

break;

1875

case 'S':

1876

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1877

token->type = OP_NOTSPACE;

1878

break;

1879

case '`':

1880

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1881

{

1882

token->type = ANCHOR;

1883

token->opr.ctx_type = BUF_FIRST;

1884

}

1885

break;

1886

case '\'':

1887

if (!(syntax & RE_NO_GNU_OPS(((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1)))

1888

{

1889

token->type = ANCHOR;

1890

token->opr.ctx_type = BUF_LAST;

1891

}

1892

break;

1893

case '(':

1894

if (!(syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)))

1895

token->type = OP_OPEN_SUBEXP;

1896

break;

1897

case ')':

1898

if (!(syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)))

1899

token->type = OP_CLOSE_SUBEXP;

1900

break;

1901

case '+':

1902

if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)) && (syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))

1903

token->type = OP_DUP_PLUS;

1904

break;

1905

case '?':

1906

if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)) && (syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))

1907

token->type = OP_DUP_QUESTION;

1908

break;

1909

case '{':

1910

if ((syntax & RE_INTERVALS(((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)) && (!(syntax & RE_NO_BK_BRACES((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1))))

1911

token->type = OP_OPEN_DUP_NUM;

1912

break;

1913

case '}':

1914

1915

token->type = OP_CLOSE_DUP_NUM;

1916

break;

1917

default:

1918

break;

1919

}

1920

return 2;

1921

}

1922

1923

token->type = CHARACTER;

1924

#ifdef RE_ENABLE_I18N

1925

if (input->mb_cur_max > 1)

1926

{

1927

wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)((input)->cur_idx));

1928

token->word_char = IS_WIDE_WORD_CHAR (wc)(iswalnum (wc) || (wc) == L'_') != 0;

1929

}

1930

else

1931

#endif

1932

token->word_char = IS_WORD_CHAR (token->opr.c)(isalnum (token->opr.c) || (token->opr.c) == '_');

1933

1934

switch (c)

1935

{

1936

case '\n':

1937

if (syntax & RE_NEWLINE_ALT(((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1))

1938

token->type = OP_ALT;

1939

break;

1940

case '|':

1941

if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)) && (syntax & RE_NO_BK_VBAR(((((((((((((((((unsigned long int) 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1)))

1942

token->type = OP_ALT;

1943

break;

1944

case '*':

1945

token->type = OP_DUP_ASTERISK;

1946

break;

1947

case '+':

1948

if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)) && !(syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))

1949

token->type = OP_DUP_PLUS;

1950

break;

1951

case '?':

1952

if (!(syntax & RE_LIMITED_OPS((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1)) && !(syntax & RE_BK_PLUS_QM(((unsigned long int) 1) << 1)))

1953

token->type = OP_DUP_QUESTION;

1954

break;

1955

case '{':

1956

if ((syntax & RE_INTERVALS(((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1)) && (syntax & RE_NO_BK_BRACES((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1)))

1957

token->type = OP_OPEN_DUP_NUM;

1958

break;

1959

case '}':

1960

1961

token->type = OP_CLOSE_DUP_NUM;

1962

break;

1963

case '(':

1964

if (syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1))

1965

token->type = OP_OPEN_SUBEXP;

1966

break;

1967

case ')':

1968

if (syntax & RE_NO_BK_PARENS(((((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1))

1969

token->type = OP_CLOSE_SUBEXP;

1970

break;

1971

case '[':

1972

token->type = OP_OPEN_BRACKET;

1973

break;

1974

case '.':

1975

token->type = OP_PERIOD;

1976

break;

1977

case '^':

1978

if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS(((((unsigned long int) 1) << 1) << 1) << 1
) | RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1))) &&

1979

re_string_cur_idx (input)((input)->cur_idx) != 0)

1980

{

1981

char prev = re_string_peek_byte (input, -1)((input)->mbs[(input)->cur_idx + -1]);

1982

if (!(syntax & RE_NEWLINE_ALT(((((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1)) || prev != '\n')

1983

break;

1984

}

1985

token->type = ANCHOR;

1986

token->opr.ctx_type = LINE_FIRST;

1987

break;

1988

case '$':

1989

if (!(syntax & RE_CONTEXT_INDEP_ANCHORS(((((unsigned long int) 1) << 1) << 1) << 1
)) &&

1990

re_string_cur_idx (input)((input)->cur_idx) + 1 != re_string_length (input)((input)->len))

1991

{

1992

re_token_t next;

1993

re_string_skip_bytes (input, 1)((input)->cur_idx += (1));

1994

peek_token (&next, input, syntax);

1995

re_string_skip_bytes (input, -1)((input)->cur_idx += (-1));

1996

if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)

1997

break;

1998

}

1999

token->type = ANCHOR;

2000

token->opr.ctx_type = LINE_LAST;

2001

break;

2002

default:

2003

break;

2004

}

2005

return 1;

2006

}

2007

2008

/* Peek a token from INPUT, and return the length of the token.

2009

We must not use this function out of bracket expressions. */

2010

2011

static int

2012

internal_function

2013

peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)

2014

{

2015

unsigned char c;

2016

if (re_string_eoi (input)((input)->stop <= (input)->cur_idx))

2017

{

2018

token->type = END_OF_RE;

2019

return 0;

2020

}

2021

c = re_string_peek_byte (input, 0)((input)->mbs[(input)->cur_idx + 0]);

2022

token->opr.c = c;

2023

2024

#ifdef RE_ENABLE_I18N

2025

if (input->mb_cur_max > 1 &&

2026

!re_string_first_byte (input, re_string_cur_idx (input))((((input)->cur_idx)) == (input)->valid_len || (input)->
wcs[((input)->cur_idx)] != WEOF))

2027

{

2028

token->type = CHARACTER;

2029

return 1;

2030

}

2031

#endif /* RE_ENABLE_I18N */

2032

2033

if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS((unsigned long int) 1))

2034

&& re_string_cur_idx (input)((input)->cur_idx) + 1 < re_string_length (input)((input)->len))

2035

{

2036

/* In this case, '\' escape a character. */

2037

unsigned char c2;

2038

re_string_skip_bytes (input, 1)((input)->cur_idx += (1));

2039

c2 = re_string_peek_byte (input, 0)((input)->mbs[(input)->cur_idx + 0]);

2040

token->opr.c = c2;

2041

token->type = CHARACTER;

2042

return 1;

2043

}

2044

if (c == '[') /* '[' is a special char in a bracket exps. */

2045

{

2046

unsigned char c2;

2047

int token_len;

2048

if (re_string_cur_idx (input)((input)->cur_idx) + 1 < re_string_length (input)((input)->len))

2049

c2 = re_string_peek_byte (input, 1)((input)->mbs[(input)->cur_idx + 1]);

2050

else

2051

c2 = 0;

2052

token->opr.c = c2;

2053

token_len = 2;

2054

switch (c2)

2055

{

2056

case '.':

2057

token->type = OP_OPEN_COLL_ELEM;

2058

break;

2059

case '=':

2060

token->type = OP_OPEN_EQUIV_CLASS;

2061

break;

2062

case ':':

2063

if (syntax & RE_CHAR_CLASSES((((unsigned long int) 1) << 1) << 1))

2064

{

2065

token->type = OP_OPEN_CHAR_CLASS;

2066

break;

2067

}

2068

/* else fall through. */

2069

default:

2070

token->type = CHARACTER;

2071

token->opr.c = c;

2072

token_len = 1;

2073

break;

2074

}

2075

return token_len;

2076

}

2077

switch (c)

2078

{

2079

case '-':

2080

token->type = OP_CHARSET_RANGE;

2081

break;

2082

case ']':

2083

token->type = OP_CLOSE_BRACKET;

2084

break;

2085

case '^':

2086

token->type = OP_NON_MATCH_LIST;

2087

break;

2088

default:

2089

token->type = CHARACTER;

2090

}

2091

return 1;

2092

}

2093

2094

/* Functions for parser. */

2095

2096

/* Entry point of the parser.

2097

Parse the regular expression REGEXP and return the structure tree.

2098

If an error has occurred, ERR is set by error code, and return NULL.

2099

This function build the following tree, from regular expression <reg_exp>:

2100

CAT

2101

/ \

2102

/ \

2103

<reg_exp> EOR

2104

2105

CAT means concatenation.

2106

EOR means end of regular expression. */

2107

2108

static bin_tree_t *

2109

parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,

2110

reg_errcode_t *err)

2111

{

2112

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

2113

bin_tree_t *tree, *eor, *root;

2114

re_token_t current_token;

2115

dfa->syntax = syntax;

2116

fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1));

2117

tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);

2118

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

←

Taking false branch

→

2119

return NULL((void*)0);

2120

eor = create_tree (dfa, NULL((void*)0), NULL((void*)0), END_OF_RE);

←

Calling 'create_tree'

→

←

Returning from 'create_tree'

→

2121

if (tree != NULL((void*)0))

←

Assuming 'tree' is equal to null

→

←

Taking false branch

→

2122

root = create_tree (dfa, tree, eor, CONCAT);

2123

else

2124

root = eor;

2125

if (BE (eor == NULL || root == NULL, 0)__builtin_expect (eor == ((void*)0) || root == ((void*)0), 0))

←

Taking false branch

→

2126

{

2127

*err = REG_ESPACE;

2128

return NULL((void*)0);

2129

}

2130

return root;

2131

}

2132

2133

/* This function build the following tree, from regular expression

2134

<branch1>|<branch2>:

2135

ALT

2136

/ \

2137

/ \

2138

2139

2140

ALT means alternative, which represents the operator `|'. */

2141

2142

static bin_tree_t *

2143

parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,

2144

reg_syntax_t syntax, int nest, reg_errcode_t *err)

2145

{

2146

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

2147

bin_tree_t *tree, *branch = NULL((void*)0);

2148

tree = parse_branch (regexp, preg, token, syntax, nest, err);

2149

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2150

return NULL((void*)0);

2151

2152

while (token->type == OP_ALT)

2153

{

2154

fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1));

2155

if (token->type != OP_ALT && token->type != END_OF_RE

2156

&& (nest == 0 || token->type != OP_CLOSE_SUBEXP))

2157

{

2158

branch = parse_branch (regexp, preg, token, syntax, nest, err);

2159

if (BE (*err != REG_NOERROR && branch == NULL, 0)__builtin_expect (*err != REG_NOERROR && branch == ((
void*)0), 0))

2160

return NULL((void*)0);

2161

}

2162

else

2163

branch = NULL((void*)0);

2164

tree = create_tree (dfa, tree, branch, OP_ALT);

2165

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2166

{

2167

*err = REG_ESPACE;

2168

return NULL((void*)0);

2169

}

2170

}

2171

return tree;

2172

}

2173

2174

/* This function build the following tree, from regular expression

2175

<exp1><exp2>:

2176

CAT

2177

/ \

2178

/ \

2179

2180

2181

CAT means concatenation. */

2182

2183

static bin_tree_t *

2184

parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,

2185

reg_syntax_t syntax, int nest, reg_errcode_t *err)

2186

{

2187

bin_tree_t *tree, *exp;

2188

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

2189

tree = parse_expression (regexp, preg, token, syntax, nest, err);

2190

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2191

return NULL((void*)0);

2192

2193

while (token->type != OP_ALT && token->type != END_OF_RE

2194

&& (nest == 0 || token->type != OP_CLOSE_SUBEXP))

2195

{

2196

exp = parse_expression (regexp, preg, token, syntax, nest, err);

2197

if (BE (*err != REG_NOERROR && exp == NULL, 0)__builtin_expect (*err != REG_NOERROR && exp == ((void
*)0), 0))

2198

{

2199

return NULL((void*)0);

2200

}

2201

if (tree != NULL((void*)0) && exp != NULL((void*)0))

2202

{

2203

tree = create_tree (dfa, tree, exp, CONCAT);

2204

if (tree == NULL((void*)0))

2205

{

2206

*err = REG_ESPACE;

2207

return NULL((void*)0);

2208

}

2209

}

2210

else if (tree == NULL((void*)0))

2211

tree = exp;

2212

/* Otherwise exp == NULL, we don't need to create new tree. */

2213

}

2214

return tree;

2215

}

2216

2217

/* This function build the following tree, from regular expression a*:

2218

2219

2220

2221

2222

2223

static bin_tree_t *

2224

parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,

2225

reg_syntax_t syntax, int nest, reg_errcode_t *err)

2226

{

2227

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

2228

bin_tree_t *tree;

2229

switch (token->type)

2230

{

2231

case CHARACTER:

2232

tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2233

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2234

{

2235

*err = REG_ESPACE;

2236

return NULL((void*)0);

2237

}

2238

#ifdef RE_ENABLE_I18N

2239

if (dfa->mb_cur_max > 1)

2240

{

2241

while (!re_string_eoi (regexp)((regexp)->stop <= (regexp)->cur_idx)

2242

&& !re_string_first_byte (regexp, re_string_cur_idx (regexp))((((regexp)->cur_idx)) == (regexp)->valid_len || (regexp
)->wcs[((regexp)->cur_idx)] != WEOF))

2243

{

2244

bin_tree_t *mbc_remain;

2245

fetch_token (token, regexp, syntax);

2246

mbc_remain = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2247

tree = create_tree (dfa, tree, mbc_remain, CONCAT);

2248

if (BE (mbc_remain == NULL || tree == NULL, 0)__builtin_expect (mbc_remain == ((void*)0) || tree == ((void*
)0), 0))

2249

{

2250

*err = REG_ESPACE;

2251

return NULL((void*)0);

2252

}

2253

}

2254

}

2255

#endif

2256

break;

2257

case OP_OPEN_SUBEXP:

2258

tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);

2259

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2260

return NULL((void*)0);

2261

break;

2262

case OP_OPEN_BRACKET:

2263

tree = parse_bracket_exp (regexp, dfa, token, syntax, err);

2264

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2265

return NULL((void*)0);

2266

break;

2267

case OP_BACK_REF:

2268

if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)__builtin_expect (dfa->completed_bkref_map & (1 <<
token->opr.idx), 1))

2269

{

2270

*err = REG_ESUBREG;

2271

return NULL((void*)0);

2272

}

2273

dfa->used_bkref_map |= 1 << token->opr.idx;

2274

tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2275

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2276

{

2277

*err = REG_ESPACE;

2278

return NULL((void*)0);

2279

}

2280

++dfa->nbackref;

2281

dfa->has_mb_node = 1;

2282

break;

2283

case OP_OPEN_DUP_NUM:

2284

if (syntax & RE_CONTEXT_INVALID_DUP(((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
))

2285

{

2286

*err = REG_BADRPT;

2287

return NULL((void*)0);

2288

}

2289

/* FALLTHROUGH */

2290

case OP_DUP_ASTERISK:

2291

case OP_DUP_PLUS:

2292

case OP_DUP_QUESTION:

2293

if (syntax & RE_CONTEXT_INVALID_OPS(((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1))

2294

{

2295

*err = REG_BADRPT;

2296

return NULL((void*)0);

2297

}

2298

else if (syntax & RE_CONTEXT_INDEP_OPS((((((unsigned long int) 1) << 1) << 1) << 1
) << 1))

2299

{

2300

fetch_token (token, regexp, syntax);

2301

return parse_expression (regexp, preg, token, syntax, nest, err);

2302

}

2303

/* else fall through */

2304

case OP_CLOSE_SUBEXP:

2305

if ((token->type == OP_CLOSE_SUBEXP) &&

2306

!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD(((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1)))

2307

{

2308

*err = REG_ERPAREN;

2309

return NULL((void*)0);

2310

}

2311

/* else fall through */

2312

case OP_CLOSE_DUP_NUM:

2313

/* We treat it as a normal character. */

2314

2315

/* Then we can these characters as normal characters. */

2316

token->type = CHARACTER;

2317

/* mb_partial and word_char bits should be initialized already

2318

by peek_token. */

2319

tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2320

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2321

{

2322

*err = REG_ESPACE;

2323

return NULL((void*)0);

2324

}

2325

break;

2326

case ANCHOR:

2327

if ((token->opr.ctx_type

2328

& (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))

2329

&& dfa->word_ops_used == 0)

2330

init_word_char (dfa);

2331

if (token->opr.ctx_type == WORD_DELIM

2332

|| token->opr.ctx_type == NOT_WORD_DELIM)

2333

{

2334

bin_tree_t *tree_first, *tree_last;

2335

if (token->opr.ctx_type == WORD_DELIM)

2336

{

2337

token->opr.ctx_type = WORD_FIRST;

2338

tree_first = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2339

token->opr.ctx_type = WORD_LAST;

2340

}

2341

else

2342

{

2343

token->opr.ctx_type = INSIDE_WORD;

2344

tree_first = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2345

token->opr.ctx_type = INSIDE_NOTWORD;

2346

}

2347

tree_last = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2348

tree = create_tree (dfa, tree_first, tree_last, OP_ALT);

2349

if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)__builtin_expect (tree_first == ((void*)0) || tree_last == ((
void*)0) || tree == ((void*)0), 0))

2350

{

2351

*err = REG_ESPACE;

2352

return NULL((void*)0);

2353

}

2354

}

2355

else

2356

{

2357

tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2358

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2359

{

2360

*err = REG_ESPACE;

2361

return NULL((void*)0);

2362

}

2363

}

2364

/* We must return here, since ANCHORs can't be followed

2365

by repetition operators.

2366

eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",

2367

it must not be "<ANCHOR(^)><REPEAT(*)>". */

2368

fetch_token (token, regexp, syntax);

2369

return tree;

2370

case OP_PERIOD:

2371

tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), token);

2372

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2373

{

2374

*err = REG_ESPACE;

2375

return NULL((void*)0);

2376

}

2377

if (dfa->mb_cur_max > 1)

2378

dfa->has_mb_node = 1;

2379

break;

2380

case OP_WORD:

2381

case OP_NOTWORD:

2382

tree = build_charclass_op (dfa, regexp->trans,

2383

"alnum",

2384

"_",

2385

token->type == OP_NOTWORD, err);

2386

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2387

return NULL((void*)0);

2388

break;

2389

case OP_SPACE:

2390

case OP_NOTSPACE:

2391

tree = build_charclass_op (dfa, regexp->trans,

2392

"space",

2393

"",

2394

token->type == OP_NOTSPACE, err);

2395

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2396

return NULL((void*)0);

2397

break;

2398

case OP_ALT:

2399

case END_OF_RE:

2400

return NULL((void*)0);

2401

case BACK_SLASH:

2402

*err = REG_EESCAPE;

2403

return NULL((void*)0);

2404

default:

2405

/* Must not happen? */

2406

#ifdef DEBUG

2407

assert (0)(__builtin_expect(!(0), 0) ? __assert_rtn(__func__, "compat/regex/regcomp.c"
, 2407, "0") : (void)0);

2408

#endif

2409

return NULL((void*)0);

2410

}

2411

fetch_token (token, regexp, syntax);

2412

2413

while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS

2414

|| token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)

2415

{

2416

tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);

2417

if (BE (*err != REG_NOERROR && tree == NULL, 0)__builtin_expect (*err != REG_NOERROR && tree == ((void
*)0), 0))

2418

return NULL((void*)0);

2419

/* In BRE consecutive duplications are not allowed. */

2420

if ((syntax & RE_CONTEXT_INVALID_DUP(((((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
))

2421

&& (token->type == OP_DUP_ASTERISK

2422

|| token->type == OP_OPEN_DUP_NUM))

2423

{

2424

*err = REG_BADRPT;

2425

return NULL((void*)0);

2426

}

2427

}

2428

2429

return tree;

2430

}

2431

2432

/* This function build the following tree, from regular expression

2433

(<reg_exp>):

2434

SUBEXP

2435

2436

<reg_exp>

2437

2438

2439

static bin_tree_t *

2440

parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,

2441

reg_syntax_t syntax, int nest, reg_errcode_t *err)

2442

{

2443

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

2444

bin_tree_t *tree;

2445

size_t cur_nsub;

2446

cur_nsub = preg->re_nsub++;

2447

2448

2449

2450

/* The subexpression may be a null string. */

2451

if (token->type == OP_CLOSE_SUBEXP)

2452

tree = NULL((void*)0);

2453

else

2454

{

2455

tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);

2456

if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)__builtin_expect (*err == REG_NOERROR && token->type
!= OP_CLOSE_SUBEXP, 0))

2457

*err = REG_EPAREN;

2458

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2459

return NULL((void*)0);

2460

}

2461

2462

if (cur_nsub <= '9' - '1')

2463

dfa->completed_bkref_map |= 1 << cur_nsub;

2464

2465

tree = create_tree (dfa, tree, NULL((void*)0), SUBEXP);

2466

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2467

{

2468

*err = REG_ESPACE;

2469

return NULL((void*)0);

2470

}

2471

tree->token.opr.idx = cur_nsub;

2472

return tree;

2473

}

2474

2475

/* This function parse repetition operators like "*", "+", "{1,3}" etc. */

2476

2477

static bin_tree_t *

2478

parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,

2479

re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)

2480

{

2481

bin_tree_t *tree = NULL((void*)0), *old_tree = NULL((void*)0);

2482

int i, start, end, start_idx = re_string_cur_idx (regexp)((regexp)->cur_idx);

2483

#ifndef RE_TOKEN_INIT_BUG

2484

re_token_t start_token = *token;

2485

#else

2486

re_token_t start_token;

2487

2488

memcpy ((void *) &start_token, (void *) token, sizeof start_token)__builtin___memcpy_chk ((void *) &start_token, (void *) token
, sizeof start_token, __builtin_object_size ((void *) &start_token
, 0));

2489

#endif

2490

2491

if (token->type == OP_OPEN_DUP_NUM)

2492

{

2493

end = 0;

2494

start = fetch_number (regexp, token, syntax);

2495

if (start == -1)

2496

{

2497

if (token->type == CHARACTER && token->opr.c == ',')

2498

start = 0; /* We treat "{,m}" as "{0,m}". */

2499

else

2500

{

2501

*err = REG_BADBR; /* <re>{} is invalid. */

2502

return NULL((void*)0);

2503

}

2504

}

2505

if (BE (start != -2, 1)__builtin_expect (start != -2, 1))

2506

{

2507

/* We treat "{n}" as "{n,n}". */

2508

end = ((token->type == OP_CLOSE_DUP_NUM) ? start

2509

: ((token->type == CHARACTER && token->opr.c == ',')

2510

? fetch_number (regexp, token, syntax) : -2));

2511

}

2512

if (BE (start == -2 || end == -2, 0)__builtin_expect (start == -2 || end == -2, 0))

2513

{

2514

/* Invalid sequence. */

2515

if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)__builtin_expect (!(syntax & ((((((((((((((((((((((unsigned
long int) 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
), 0))

2516

{

2517

if (token->type == END_OF_RE)

2518

*err = REG_EBRACE;

2519

else

2520

*err = REG_BADBR;

2521

2522

return NULL((void*)0);

2523

}

2524

2525

/* If the syntax bit is set, rollback. */

2526

re_string_set_index (regexp, start_idx)((regexp)->cur_idx = (start_idx));

2527

*token = start_token;

2528

token->type = CHARACTER;

2529

/* mb_partial and word_char bits should be already initialized by

2530

peek_token. */

2531

return elem;

2532

}

2533

2534

if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0)__builtin_expect ((end != -1 && start > end) || token
->type != OP_CLOSE_DUP_NUM, 0))

2535

{

2536

/* First number greater than second. */

2537

*err = REG_BADBR;

2538

return NULL((void*)0);

2539

}

2540

}

2541

else

2542

{

2543

start = (token->type == OP_DUP_PLUS) ? 1 : 0;

2544

end = (token->type == OP_DUP_QUESTION) ? 1 : -1;

2545

}

2546

2547

fetch_token (token, regexp, syntax);

2548

2549

if (BE (elem == NULL, 0)__builtin_expect (elem == ((void*)0), 0))

2550

return NULL((void*)0);

2551

if (BE (start == 0 && end == 0, 0)__builtin_expect (start == 0 && end == 0, 0))

2552

{

2553

postorder (elem, free_tree, NULL((void*)0));

2554

return NULL((void*)0);

2555

}

2556

2557

/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */

2558

if (BE (start > 0, 0)__builtin_expect (start > 0, 0))

2559

{

2560

tree = elem;

2561

for (i = 2; i <= start; ++i)

2562

{

2563

elem = duplicate_tree (elem, dfa);

2564

tree = create_tree (dfa, tree, elem, CONCAT);

2565

if (BE (elem == NULL || tree == NULL, 0)__builtin_expect (elem == ((void*)0) || tree == ((void*)0), 0
))

2566

goto parse_dup_op_espace;

2567

}

2568

2569

if (start == end)

2570

return tree;

2571

2572

/* Duplicate ELEM before it is marked optional. */

2573

elem = duplicate_tree (elem, dfa);

2574

old_tree = tree;

2575

}

2576

else

2577

old_tree = NULL((void*)0);

2578

2579

if (elem->token.type == SUBEXP)

2580

postorder (elem, mark_opt_subexp, (void *) (intptr_t) elem->token.opr.idx);

2581

2582

tree = create_tree (dfa, elem, NULL((void*)0), (end == -1 ? OP_DUP_ASTERISK : OP_ALT));

2583

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2584

goto parse_dup_op_espace;

2585

2586

/* This loop is actually executed only when end != -1,

2587

to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have

2588

already created the start+1-th copy. */

2589

for (i = start + 2; i <= end; ++i)

2590

{

2591

elem = duplicate_tree (elem, dfa);

2592

tree = create_tree (dfa, tree, elem, CONCAT);

2593

if (BE (elem == NULL || tree == NULL, 0)__builtin_expect (elem == ((void*)0) || tree == ((void*)0), 0
))

2594

goto parse_dup_op_espace;

2595

2596

tree = create_tree (dfa, tree, NULL((void*)0), OP_ALT);

2597

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

2598

goto parse_dup_op_espace;

2599

}

2600

2601

if (old_tree)

2602

tree = create_tree (dfa, old_tree, tree, CONCAT);

2603

2604

return tree;

2605

2606

parse_dup_op_espace:

2607

*err = REG_ESPACE;

2608

return NULL((void*)0);

2609

}

2610

2611

/* Size of the names for collating symbol/equivalence_class/character_class.

2612

I'm not sure, but maybe enough. */

2613

#define BRACKET_NAME_BUF_SIZE32 32

2614

2615

#ifndef _LIBC

2616

/* Local function for parse_bracket_exp only used in case of NOT _LIBC.

2617

Build the range expression which starts from START_ELEM, and ends

2618

at END_ELEM. The result are written to MBCSET and SBCSET.

2619

RANGE_ALLOC is the allocated size of mbcset->range_starts, and

2620

mbcset->range_ends, is a pointer argument since we may

2621

update it. */

2622

2623

static reg_errcode_t

2624

internal_function

2625

# ifdef RE_ENABLE_I18N

2626

build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,

2627

bracket_elem_t *start_elem, bracket_elem_t *end_elem)

2628

# else /* not RE_ENABLE_I18N */

2629

build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,

2630

bracket_elem_t *end_elem)

2631

# endif /* not RE_ENABLE_I18N */

2632

{

2633

unsigned int start_ch, end_ch;

2634

/* Equivalence Classes and Character Classes can't be a range start/end. */

2635

if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)

2636

|| end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0)

2637

0)__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0))

2638

return REG_ERANGE;

2639

2640

/* We can handle no multi character collating elements without libc

2641

support. */

2642

if (BE ((start_elem->type == COLL_SYM__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)

2643

&& strlen ((char *) start_elem->opr.name) > 1)__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)

2644

|| (end_elem->type == COLL_SYM__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0)

2645

&& strlen ((char *) end_elem->opr.name) > 1), 0)__builtin_expect ((start_elem->type == COLL_SYM &&
strlen ((char *) start_elem->opr.name) > 1) || (end_elem
->type == COLL_SYM && strlen ((char *) end_elem->
opr.name) > 1), 0))

2646

return REG_ECOLLATE;

2647

2648

# ifdef RE_ENABLE_I18N

2649

{

2650

wchar_t wc;

2651

wint_t start_wc;

2652

wint_t end_wc;

2653

wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};

2654

2655

start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch

2656

: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]

2657

: 0));

2658

end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch

2659

: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]

2660

: 0));

2661

#ifdef GAWK1

2662

2663

* Fedora Core 2, maybe others, have broken `btowc' that returns -1

2664

* for any value > 127. Sigh. Note that `start_ch' and `end_ch' are

2665

* unsigned, so we don't have sign extension problems.

2666

2667

start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)

2668

? start_ch : start_elem->opr.wch);

2669

end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)

2670

? end_ch : end_elem->opr.wch);

2671

#else

2672

start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)

2673

? __btowcbtowc (start_ch) : start_elem->opr.wch);

2674

end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)

2675

? __btowcbtowc (end_ch) : end_elem->opr.wch);

2676

#endif

2677

if (start_wc == WEOF || end_wc == WEOF)

2678

return REG_ECOLLATE;

2679

cmp_buf[0] = start_wc;

2680

cmp_buf[4] = end_wc;

2681

if (wcscoll (cmp_buf, cmp_buf + 4) > 0)

2682

return REG_ERANGE;

2683

2684

/* Got valid collation sequence values, add them as a new entry.

2685

However, for !_LIBC we have no collation elements: if the

2686

character set is single byte, the single byte character set

2687

that we build below suffices. parse_bracket_exp passes

2688

no MBCSET if dfa->mb_cur_max == 1. */

2689

if (mbcset)

2690

{

2691

/* Check the space of the arrays. */

2692

if (BE (*range_alloc == mbcset->nranges, 0)__builtin_expect (*range_alloc == mbcset->nranges, 0))

2693

{

2694

/* There is not enough space, need realloc. */

2695

wchar_t *new_array_start, *new_array_end;

2696

int new_nranges;

2697

2698

/* +1 in case of mbcset->nranges is 0. */

2699

new_nranges = 2 * mbcset->nranges + 1;

2700

/* Use realloc since mbcset->range_starts and mbcset->range_ends

2701

are NULL if *range_alloc == 0. */

2702

new_array_start = re_realloc (mbcset->range_starts, wchar_t,((mbcset->range_starts != ((void*)0)) ? (wchar_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)))

2703

new_nranges)((mbcset->range_starts != ((void*)0)) ? (wchar_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)));

2704

new_array_end = re_realloc (mbcset->range_ends, wchar_t,((mbcset->range_ends != ((void*)0)) ? (wchar_t *) realloc (
mbcset->range_ends,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)))

2705

new_nranges)((mbcset->range_ends != ((void*)0)) ? (wchar_t *) realloc (
mbcset->range_ends,(new_nranges)*sizeof(wchar_t)) : (wchar_t
*) calloc(new_nranges,sizeof(wchar_t)));

2706

2707

if (BE (new_array_start == NULL || new_array_end == NULL, 0)__builtin_expect (new_array_start == ((void*)0) || new_array_end
== ((void*)0), 0))

2708

return REG_ESPACE;

2709

2710

mbcset->range_starts = new_array_start;

2711

mbcset->range_ends = new_array_end;

2712

*range_alloc = new_nranges;

2713

}

2714

2715

mbcset->range_starts[mbcset->nranges] = start_wc;

2716

mbcset->range_ends[mbcset->nranges++] = end_wc;

2717

}

2718

2719

/* Build the table for single byte characters. */

2720

for (wc = 0; wc < SBC_MAX256; ++wc)

2721

{

2722

cmp_buf[2] = wc;

2723

if (wcscoll (cmp_buf, cmp_buf + 2) <= 0

2724

&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)

2725

bitset_set (sbcset, wc)(sbcset[wc / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << wc % (sizeof (bitset_word_t) * 8));

2726

}

2727

}

2728

# else /* not RE_ENABLE_I18N */

2729

{

2730

unsigned int ch;

2731

start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch

2732

: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]

2733

: 0));

2734

end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch

2735

: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]

2736

: 0));

2737

if (start_ch > end_ch)

2738

return REG_ERANGE;

2739

/* Build the table for single byte characters. */

2740

for (ch = 0; ch < SBC_MAX256; ++ch)

2741

if (start_ch <= ch && ch <= end_ch)

2742

bitset_set (sbcset, ch)(sbcset[ch / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << ch % (sizeof (bitset_word_t) * 8));

2743

}

2744

# endif /* not RE_ENABLE_I18N */

2745

return REG_NOERROR;

2746

}

2747

#endif /* not _LIBC */

2748

2749

#ifndef _LIBC

2750

/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..

2751

Build the collating element which is represented by NAME.

2752

The result are written to MBCSET and SBCSET.

2753

COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a

2754

pointer argument since we may update it. */

2755

2756

static reg_errcode_t

2757

internal_function

2758

# ifdef RE_ENABLE_I18N

2759

build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,

2760

int *coll_sym_alloc, const unsigned char *name)

2761

# else /* not RE_ENABLE_I18N */

2762

build_collating_symbol (bitset_t sbcset, const unsigned char *name)

2763

# endif /* not RE_ENABLE_I18N */

2764

{

2765

size_t name_len = strlen ((const char *) name);

2766

if (BE (name_len != 1, 0)__builtin_expect (name_len != 1, 0))

2767

return REG_ECOLLATE;

2768

else

2769

{

2770

bitset_set (sbcset, name[0])(sbcset[name[0] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << name[0] % (sizeof (bitset_word_t) * 8));

2771

return REG_NOERROR;

2772

}

2773

}

2774

#endif /* not _LIBC */

2775

2776

/* This function parse bracket expression like "[abc]", "[a-c]",

2777

"[[.a-a.]]" etc. */

2778

2779

static bin_tree_t *

2780

parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,

2781

reg_syntax_t syntax, reg_errcode_t *err)

2782

{

2783

#ifdef _LIBC

2784

const unsigned char *collseqmb;

2785

const char *collseqwc;

2786

uint32_t nrules;

2787

int32_t table_size;

2788

const int32_t *symb_table;

2789

const unsigned char *extra;

2790

2791

/* Local function for parse_bracket_exp used in _LIBC environment.

2792

Seek the collating symbol entry correspondings to NAME.

2793

Return the index of the symbol in the SYMB_TABLE. */

2794

2795

auto inline int32_t

2796

__attribute ((always_inline))__attribute__ ((always_inline))

2797

seek_collating_symbol_entry (name, name_len)

2798

const unsigned char *name;

2799

size_t name_len;

2800

{

2801

int32_t hash = elem_hash ((const char *) name, name_len);

2802

int32_t elem = hash % table_size;

2803

if (symb_table[2 * elem] != 0)

2804

{

2805

int32_t second = hash % (table_size - 2) + 1;

2806

2807

2808

{

2809

/* First compare the hashing value. */

2810

if (symb_table[2 * elem] == hash

2811

/* Compare the length of the name. */

2812

&& name_len == extra[symb_table[2 * elem + 1]]

2813

/* Compare the name. */

2814

&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],

2815

name_len) == 0)

2816

{

2817

/* Yep, this is the entry. */

2818

break;

2819

}

2820

2821

/* Next entry. */

2822

elem += second;

2823

}

2824

while (symb_table[2 * elem] != 0);

2825

}

2826

return elem;

2827

}

2828

2829

/* Local function for parse_bracket_exp used in _LIBC environment.

2830

Look up the collation sequence value of BR_ELEM.

2831

Return the value if succeeded, UINT_MAX otherwise. */

2832

2833

auto inline unsigned int

2834

__attribute ((always_inline))__attribute__ ((always_inline))

2835

lookup_collation_sequence_value (br_elem)

2836

bracket_elem_t *br_elem;

2837

{

2838

if (br_elem->type == SB_CHAR)

2839

{

2840

2841

if (MB_CUR_MAX == 1)

2842

2843

if (nrules == 0)

2844

return collseqmb[br_elem->opr.ch];

2845

else

2846

{

2847

wint_t wc = __btowcbtowc (br_elem->opr.ch);

2848

return __collseq_table_lookup (collseqwc, wc);

2849

}

2850

}

2851

else if (br_elem->type == MB_CHAR)

2852

{

2853

if (nrules != 0)

2854

return __collseq_table_lookup (collseqwc, br_elem->opr.wch);

2855

}

2856

else if (br_elem->type == COLL_SYM)

2857

{

2858

size_t sym_name_len = strlen ((char *) br_elem->opr.name);

2859

if (nrules != 0)

2860

{

2861

int32_t elem, idx;

2862

elem = seek_collating_symbol_entry (br_elem->opr.name,

2863

sym_name_len);

2864

if (symb_table[2 * elem] != 0)

2865

{

2866

/* We found the entry. */

2867

idx = symb_table[2 * elem + 1];

2868

/* Skip the name of collating element name. */

2869

idx += 1 + extra[idx];

2870

/* Skip the byte sequence of the collating element. */

2871

idx += 1 + extra[idx];

2872

/* Adjust for the alignment. */

2873

idx = (idx + 3) & ~3;

2874

/* Skip the multibyte collation sequence value. */

2875

idx += sizeof (unsigned int);

2876

/* Skip the wide char sequence of the collating element. */

2877

idx += sizeof (unsigned int) *

2878

(1 + *(unsigned int *) (extra + idx));

2879

/* Return the collation sequence value. */

2880

return *(unsigned int *) (extra + idx);

2881

}

2882

else if (symb_table[2 * elem] == 0 && sym_name_len == 1)

2883

{

2884

/* No valid character. Match it as a single byte

2885

character. */

2886

return collseqmb[br_elem->opr.name[0]];

2887

}

2888

}

2889

else if (sym_name_len == 1)

2890

return collseqmb[br_elem->opr.name[0]];

2891

}

2892

return UINT_MAX(2147483647 *2U +1U);

2893

}

2894

2895

/* Local function for parse_bracket_exp used in _LIBC environment.

2896

Build the range expression which starts from START_ELEM, and ends

2897

at END_ELEM. The result are written to MBCSET and SBCSET.

2898

RANGE_ALLOC is the allocated size of mbcset->range_starts, and

2899

mbcset->range_ends, is a pointer argument since we may

2900

update it. */

2901

2902

auto inline reg_errcode_t

2903

__attribute ((always_inline))__attribute__ ((always_inline))

2904

build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)

2905

re_charset_t *mbcset;

2906

int *range_alloc;

2907

bitset_t sbcset;

2908

bracket_elem_t *start_elem, *end_elem;

2909

{

2910

unsigned int ch;

2911

uint32_t start_collseq;

2912

uint32_t end_collseq;

2913

2914

/* Equivalence Classes and Character Classes can't be a range

2915

start/end. */

2916

2917

2918

0)__builtin_expect (start_elem->type == EQUIV_CLASS || start_elem
->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS ||
end_elem->type == CHAR_CLASS, 0))

2919

return REG_ERANGE;

2920

2921

start_collseq = lookup_collation_sequence_value (start_elem);

2922

end_collseq = lookup_collation_sequence_value (end_elem);

2923

/* Check start/end collation sequence values. */

2924

if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)__builtin_expect (start_collseq == (2147483647 *2U +1U) || end_collseq
== (2147483647 *2U +1U), 0))

2925

return REG_ECOLLATE;

2926

if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)__builtin_expect ((syntax & ((((((((((((((((((unsigned long
int) 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1)) && start_collseq > end_collseq, 0))

2927

return REG_ERANGE;

2928

2929

/* Got valid collation sequence values, add them as a new entry.

2930

However, if we have no collation elements, and the character set

2931

is single byte, the single byte character set that we

2932

build below suffices. */

2933

if (nrules > 0 || dfa->mb_cur_max > 1)

2934

{

2935

/* Check the space of the arrays. */

2936

if (BE (*range_alloc == mbcset->nranges, 0)__builtin_expect (*range_alloc == mbcset->nranges, 0))

2937

{

2938

/* There is not enough space, need realloc. */

2939

uint32_t *new_array_start;

2940

uint32_t *new_array_end;

2941

int new_nranges;

2942

2943

/* +1 in case of mbcset->nranges is 0. */

2944

new_nranges = 2 * mbcset->nranges + 1;

2945

new_array_start = re_realloc (mbcset->range_starts, uint32_t,((mbcset->range_starts != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(uint32_t)) : (
uint32_t *) calloc(new_nranges,sizeof(uint32_t)))

2946

new_nranges)((mbcset->range_starts != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_starts,(new_nranges)*sizeof(uint32_t)) : (
uint32_t *) calloc(new_nranges,sizeof(uint32_t)));

2947

new_array_end = re_realloc (mbcset->range_ends, uint32_t,((mbcset->range_ends != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_ends,(new_nranges)*sizeof(uint32_t)) : (uint32_t
*) calloc(new_nranges,sizeof(uint32_t)))

2948

new_nranges)((mbcset->range_ends != ((void*)0)) ? (uint32_t *) realloc
(mbcset->range_ends,(new_nranges)*sizeof(uint32_t)) : (uint32_t
*) calloc(new_nranges,sizeof(uint32_t)));

2949

2950

if (BE (new_array_start == NULL || new_array_end == NULL, 0)__builtin_expect (new_array_start == ((void*)0) || new_array_end
== ((void*)0), 0))

2951

return REG_ESPACE;

2952

2953

mbcset->range_starts = new_array_start;

2954

mbcset->range_ends = new_array_end;

2955

*range_alloc = new_nranges;

2956

}

2957

2958

mbcset->range_starts[mbcset->nranges] = start_collseq;

2959

mbcset->range_ends[mbcset->nranges++] = end_collseq;

2960

}

2961

2962

/* Build the table for single byte characters. */

2963

for (ch = 0; ch < SBC_MAX256; ch++)

2964

{

2965

uint32_t ch_collseq;

2966

2967

if (MB_CUR_MAX == 1)

2968

2969

if (nrules == 0)

2970

ch_collseq = collseqmb[ch];

2971

else

2972

ch_collseq = __collseq_table_lookup (collseqwc, __btowcbtowc (ch));

2973

if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)

2974

bitset_set (sbcset, ch)(sbcset[ch / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << ch % (sizeof (bitset_word_t) * 8));

2975

}

2976

return REG_NOERROR;

2977

}

2978

2979

/* Local function for parse_bracket_exp used in _LIBC environment.

2980

Build the collating element which is represented by NAME.

2981

The result are written to MBCSET and SBCSET.

2982

COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a

2983

pointer argument since we may update it. */

2984

2985

auto inline reg_errcode_t

2986

__attribute ((always_inline))__attribute__ ((always_inline))

2987

build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)

2988

re_charset_t *mbcset;

2989

int *coll_sym_alloc;

2990

bitset_t sbcset;

2991

const unsigned char *name;

2992

{

2993

int32_t elem, idx;

2994

size_t name_len = strlen ((const char *) name);

2995

if (nrules != 0)

2996

{

2997

elem = seek_collating_symbol_entry (name, name_len);

2998

if (symb_table[2 * elem] != 0)

2999

{

3000

/* We found the entry. */

3001

idx = symb_table[2 * elem + 1];

3002

/* Skip the name of collating element name. */

3003

idx += 1 + extra[idx];

3004

}

3005

else if (symb_table[2 * elem] == 0 && name_len == 1)

3006

{

3007

/* No valid character, treat it as a normal

3008

character. */

3009

bitset_set (sbcset, name[0])(sbcset[name[0] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << name[0] % (sizeof (bitset_word_t) * 8));

3010

return REG_NOERROR;

3011

}

3012

else

3013

return REG_ECOLLATE;

3014

3015

/* Got valid collation sequence, add it as a new entry. */

3016

/* Check the space of the arrays. */

3017

if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)__builtin_expect (*coll_sym_alloc == mbcset->ncoll_syms, 0
))

3018

{

3019

/* Not enough, realloc it. */

3020

/* +1 in case of mbcset->ncoll_syms is 0. */

3021

int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;

3022

/* Use realloc since mbcset->coll_syms is NULL

3023

if *alloc == 0. */

3024

int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,((mbcset->coll_syms != ((void*)0)) ? (int32_t *) realloc (
mbcset->coll_syms,(new_coll_sym_alloc)*sizeof(int32_t)) : (
int32_t *) calloc(new_coll_sym_alloc,sizeof(int32_t)))

3025

new_coll_sym_alloc)((mbcset->coll_syms != ((void*)0)) ? (int32_t *) realloc (
mbcset->coll_syms,(new_coll_sym_alloc)*sizeof(int32_t)) : (
int32_t *) calloc(new_coll_sym_alloc,sizeof(int32_t)));

3026

if (BE (new_coll_syms == NULL, 0)__builtin_expect (new_coll_syms == ((void*)0), 0))

3027

return REG_ESPACE;

3028

mbcset->coll_syms = new_coll_syms;

3029

*coll_sym_alloc = new_coll_sym_alloc;

3030

}

3031

mbcset->coll_syms[mbcset->ncoll_syms++] = idx;

3032

return REG_NOERROR;

3033

}

3034

else

3035

{

3036

if (BE (name_len != 1, 0)__builtin_expect (name_len != 1, 0))

3037

return REG_ECOLLATE;

3038

else

3039

{

3040

bitset_set (sbcset, name[0])(sbcset[name[0] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << name[0] % (sizeof (bitset_word_t) * 8));

3041

return REG_NOERROR;

3042

}

3043

}

3044

}

3045

#endif

3046

3047

re_token_t br_token;

3048

re_bitset_ptr_t sbcset;

3049

#ifdef RE_ENABLE_I18N

3050

re_charset_t *mbcset;

3051

int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;

3052

int equiv_class_alloc = 0, char_class_alloc = 0;

3053

#endif /* not RE_ENABLE_I18N */

3054

int non_match = 0;

3055

bin_tree_t *work_tree;

3056

int token_len;

3057

int first_round = 1;

3058

#ifdef _LIBC

3059

collseqmb = (const unsigned char *)

3060

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);

3061

nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3062

if (nrules)

3063

{

3064

3065

if (MB_CUR_MAX > 1)

3066

3067

collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);

3068

table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);

3069

symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,

3070

_NL_COLLATE_SYMB_TABLEMB);

3071

extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,

3072

_NL_COLLATE_SYMB_EXTRAMB);

3073

}

3074

#endif

3075

sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);

3076

#ifdef RE_ENABLE_I18N

3077

mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);

3078

#endif /* RE_ENABLE_I18N */

3079

#ifdef RE_ENABLE_I18N

3080

if (BE (sbcset == NULL || mbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0) || mbcset == ((void*)0
), 0))

3081

#else

3082

if (BE (sbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0), 0))

3083

#endif /* RE_ENABLE_I18N */

3084

{

3085

*err = REG_ESPACE;

3086

return NULL((void*)0);

3087

}

3088

3089

token_len = peek_token_bracket (token, regexp, syntax);

3090

if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))

3091

{

3092

*err = REG_BADPAT;

3093

goto parse_bracket_exp_free_return;

3094

}

3095

if (token->type == OP_NON_MATCH_LIST)

3096

{

3097

#ifdef RE_ENABLE_I18N

3098

mbcset->non_match = 1;

3099

#endif /* not RE_ENABLE_I18N */

3100

non_match = 1;

3101

if (syntax & RE_HAT_LISTS_NOT_NEWLINE((((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
))

3102

bitset_set (sbcset, '\n')(sbcset['\n' / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8));

3103

re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip a token. */

3104

token_len = peek_token_bracket (token, regexp, syntax);

3105

if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))

3106

{

3107

*err = REG_BADPAT;

3108

goto parse_bracket_exp_free_return;

3109

}

3110

}

3111

3112

/* We treat the first ']' as a normal character. */

3113

if (token->type == OP_CLOSE_BRACKET)

3114

token->type = CHARACTER;

3115

3116

while (1)

3117

{

3118

bracket_elem_t start_elem, end_elem;

3119

unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE32];

3120

unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE32];

3121

reg_errcode_t ret;

3122

int token_len2 = 0, is_range_exp = 0;

3123

re_token_t token2;

3124

3125

start_elem.opr.name = start_name_buf;

3126

ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,

3127

syntax, first_round);

3128

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

3129

{

3130

*err = ret;

3131

goto parse_bracket_exp_free_return;

3132

}

3133

first_round = 0;

3134

3135

/* Get information about the next token. We need it in any case. */

3136

token_len = peek_token_bracket (token, regexp, syntax);

3137

3138

/* Do not check for ranges if we know they are not allowed. */

3139

if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)

3140

{

3141

if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))

3142

{

3143

*err = REG_EBRACK;

3144

goto parse_bracket_exp_free_return;

3145

}

3146

if (token->type == OP_CHARSET_RANGE)

3147

{

3148

re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip '-'. */

3149

token_len2 = peek_token_bracket (&token2, regexp, syntax);

3150

if (BE (token2.type == END_OF_RE, 0)__builtin_expect (token2.type == END_OF_RE, 0))

3151

{

3152

*err = REG_EBRACK;

3153

goto parse_bracket_exp_free_return;

3154

}

3155

if (token2.type == OP_CLOSE_BRACKET)

3156

{

3157

/* We treat the last '-' as a normal character. */

3158

re_string_skip_bytes (regexp, -token_len)((regexp)->cur_idx += (-token_len));

3159

token->type = CHARACTER;

3160

}

3161

else

3162

is_range_exp = 1;

3163

}

3164

}

3165

3166

if (is_range_exp == 1)

3167

{

3168

end_elem.opr.name = end_name_buf;

3169

ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,

3170

dfa, syntax, 1);

3171

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

3172

{

3173

*err = ret;

3174

goto parse_bracket_exp_free_return;

3175

}

3176

3177

token_len = peek_token_bracket (token, regexp, syntax);

3178

3179

#ifdef _LIBC

3180

*err = build_range_exp (sbcset, mbcset, &range_alloc,

3181

&start_elem, &end_elem);

3182

#else

3183

# ifdef RE_ENABLE_I18N

3184

*err = build_range_exp (sbcset,

3185

dfa->mb_cur_max > 1 ? mbcset : NULL((void*)0),

3186

&range_alloc, &start_elem, &end_elem);

3187

# else

3188

*err = build_range_exp (sbcset, &start_elem, &end_elem);

3189

# endif

3190

#endif /* RE_ENABLE_I18N */

3191

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

3192

goto parse_bracket_exp_free_return;

3193

}

3194

else

3195

{

3196

switch (start_elem.type)

3197

{

3198

case SB_CHAR:

3199

bitset_set (sbcset, start_elem.opr.ch)(sbcset[start_elem.opr.ch / (sizeof (bitset_word_t) * 8)] |= (
bitset_word_t) 1 << start_elem.opr.ch % (sizeof (bitset_word_t
) * 8));

3200

break;

3201

#ifdef RE_ENABLE_I18N

3202

case MB_CHAR:

3203

/* Check whether the array has enough space. */

3204

if (BE (mbchar_alloc == mbcset->nmbchars, 0)__builtin_expect (mbchar_alloc == mbcset->nmbchars, 0))

3205

{

3206

wchar_t *new_mbchars;

3207

/* Not enough, realloc it. */

3208

/* +1 in case of mbcset->nmbchars is 0. */

3209

mbchar_alloc = 2 * mbcset->nmbchars + 1;

3210

/* Use realloc since array is NULL if *alloc == 0. */

3211

new_mbchars = re_realloc (mbcset->mbchars, wchar_t,((mbcset->mbchars != ((void*)0)) ? (wchar_t *) realloc (mbcset
->mbchars,(mbchar_alloc)*sizeof(wchar_t)) : (wchar_t *) calloc
(mbchar_alloc,sizeof(wchar_t)))

3212

mbchar_alloc)((mbcset->mbchars != ((void*)0)) ? (wchar_t *) realloc (mbcset
->mbchars,(mbchar_alloc)*sizeof(wchar_t)) : (wchar_t *) calloc
(mbchar_alloc,sizeof(wchar_t)));

3213

if (BE (new_mbchars == NULL, 0)__builtin_expect (new_mbchars == ((void*)0), 0))

3214

goto parse_bracket_exp_espace;

3215

mbcset->mbchars = new_mbchars;

3216

}

3217

mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;

3218

break;

3219

#endif /* RE_ENABLE_I18N */

3220

case EQUIV_CLASS:

3221

*err = build_equiv_class (sbcset,

3222

#ifdef RE_ENABLE_I18N

3223

mbcset, &equiv_class_alloc,

3224

#endif /* RE_ENABLE_I18N */

3225

start_elem.opr.name);

3226

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

3227

goto parse_bracket_exp_free_return;

3228

break;

3229

case COLL_SYM:

3230

*err = build_collating_symbol (sbcset,

3231

#ifdef RE_ENABLE_I18N

3232

mbcset, &coll_sym_alloc,

3233

#endif /* RE_ENABLE_I18N */

3234

start_elem.opr.name);

3235

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

3236

goto parse_bracket_exp_free_return;

3237

break;

3238

case CHAR_CLASS:

3239

*err = build_charclass (regexp->trans, sbcset,

3240

#ifdef RE_ENABLE_I18N

3241

mbcset, &char_class_alloc,

3242

#endif /* RE_ENABLE_I18N */

3243

(const char *) start_elem.opr.name, syntax);

3244

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

3245

goto parse_bracket_exp_free_return;

3246

break;

3247

default:

3248

assert (0)(__builtin_expect(!(0), 0) ? __assert_rtn(__func__, "compat/regex/regcomp.c"
, 3248, "0") : (void)0);

3249

break;

3250

}

3251

}

3252

if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))

3253

{

3254

*err = REG_EBRACK;

3255

goto parse_bracket_exp_free_return;

3256

}

3257

if (token->type == OP_CLOSE_BRACKET)

3258

break;

3259

}

3260

3261

re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip a token. */

3262

3263

/* If it is non-matching list. */

3264

if (non_match)

3265

bitset_not (sbcset);

3266

3267

#ifdef RE_ENABLE_I18N

3268

/* Ensure only single byte characters are set. */

3269

if (dfa->mb_cur_max > 1)

3270

bitset_mask (sbcset, dfa->sb_char);

3271

3272

if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes

3273

|| mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes

3274

|| mbcset->non_match)))

3275

{

3276

bin_tree_t *mbc_tree;

3277

int sbc_idx;

3278

/* Build a tree for complex bracket. */

3279

dfa->has_mb_node = 1;

3280

br_token.type = COMPLEX_BRACKET;

3281

br_token.opr.mbcset = mbcset;

3282

mbc_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);

3283

if (BE (mbc_tree == NULL, 0)__builtin_expect (mbc_tree == ((void*)0), 0))

3284

goto parse_bracket_exp_espace;

3285

for (sbc_idx = 0; sbc_idx < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++sbc_idx)

3286

if (sbcset[sbc_idx])

3287

break;

3288

/* If there are no bits set in sbcset, there is no point

3289

of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */

3290

if (sbc_idx < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)))

3291

{

3292

/* Build a tree for simple bracket. */

3293

br_token.type = SIMPLE_BRACKET;

3294

br_token.opr.sbcset = sbcset;

3295

work_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);

3296

if (BE (work_tree == NULL, 0)__builtin_expect (work_tree == ((void*)0), 0))

3297

goto parse_bracket_exp_espace;

3298

3299

/* Then join them by ALT node. */

3300

work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);

3301

if (BE (work_tree == NULL, 0)__builtin_expect (work_tree == ((void*)0), 0))

3302

goto parse_bracket_exp_espace;

3303

}

3304

else

3305

{

3306

re_free (sbcset)free (sbcset);

3307

work_tree = mbc_tree;

3308

}

3309

}

3310

else

3311

#endif /* not RE_ENABLE_I18N */

3312

{

3313

#ifdef RE_ENABLE_I18N

3314

free_charset (mbcset);

3315

#endif

3316

/* Build a tree for simple bracket. */

3317

br_token.type = SIMPLE_BRACKET;

3318

br_token.opr.sbcset = sbcset;

3319

work_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);

3320

if (BE (work_tree == NULL, 0)__builtin_expect (work_tree == ((void*)0), 0))

3321

goto parse_bracket_exp_espace;

3322

}

3323

return work_tree;

3324

3325

parse_bracket_exp_espace:

3326

*err = REG_ESPACE;

3327

parse_bracket_exp_free_return:

3328

re_free (sbcset)free (sbcset);

3329

#ifdef RE_ENABLE_I18N

3330

free_charset (mbcset);

3331

#endif /* RE_ENABLE_I18N */

3332

return NULL((void*)0);

3333

}

3334

3335

/* Parse an element in the bracket expression. */

3336

3337

static reg_errcode_t

3338

parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,

3339

re_token_t *token, int token_len, re_dfa_t *dfa,

3340

reg_syntax_t syntax, int accept_hyphen)

3341

{

3342

#ifdef RE_ENABLE_I18N

3343

int cur_char_size;

3344

cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)((regexp)->cur_idx));

3345

if (cur_char_size > 1)

3346

{

3347

elem->type = MB_CHAR;

3348

elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)((regexp)->cur_idx));

3349

re_string_skip_bytes (regexp, cur_char_size)((regexp)->cur_idx += (cur_char_size));

3350

return REG_NOERROR;

3351

}

3352

#endif /* RE_ENABLE_I18N */

3353

re_string_skip_bytes (regexp, token_len)((regexp)->cur_idx += (token_len)); /* Skip a token. */

3354

if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS

3355

|| token->type == OP_OPEN_EQUIV_CLASS)

3356

return parse_bracket_symbol (elem, regexp, token);

3357

if (BE (token->type == OP_CHARSET_RANGE, 0)__builtin_expect (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)

3358

{

3359

/* A '-' must only appear as anything but a range indicator before

3360

the closing bracket. Everything else is an error. */

3361

re_token_t token2;

3362

(void) peek_token_bracket (&token2, regexp, syntax);

3363

if (token2.type != OP_CLOSE_BRACKET)

3364

/* The actual error value is not standardized since this whole

3365

case is undefined. But ERANGE makes good sense. */

3366

return REG_ERANGE;

3367

}

3368

elem->type = SB_CHAR;

3369

elem->opr.ch = token->opr.c;

3370

return REG_NOERROR;

3371

}

3372

3373

/* Parse a bracket symbol in the bracket expression. Bracket symbols are

3374

such as [:<character_class>:], [.<collating_element>.], and

3375

[=<equivalent_class>=]. */

3376

3377

static reg_errcode_t

3378

parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,

3379

re_token_t *token)

3380

{

3381

unsigned char ch, delim = token->opr.c;

3382

int i = 0;

3383

if (re_string_eoi(regexp)((regexp)->stop <= (regexp)->cur_idx))

3384

return REG_EBRACK;

3385

for (;; ++i)

3386

{

3387

if (i >= BRACKET_NAME_BUF_SIZE32)

3388

return REG_EBRACK;

3389

if (token->type == OP_OPEN_CHAR_CLASS)

3390

ch = re_string_fetch_byte_case (regexp);

3391

else

3392

ch = re_string_fetch_byte (regexp)((regexp)->mbs[(regexp)->cur_idx++]);

3393

if (re_string_eoi(regexp)((regexp)->stop <= (regexp)->cur_idx))

3394

return REG_EBRACK;

3395

if (ch == delim && re_string_peek_byte (regexp, 0)((regexp)->mbs[(regexp)->cur_idx + 0]) == ']')

3396

break;

3397

elem->opr.name[i] = ch;

3398

}

3399

re_string_skip_bytes (regexp, 1)((regexp)->cur_idx += (1));

3400

elem->opr.name[i] = '\0';

3401

switch (token->type)

3402

{

3403

case OP_OPEN_COLL_ELEM:

3404

elem->type = COLL_SYM;

3405

break;

3406

case OP_OPEN_EQUIV_CLASS:

3407

elem->type = EQUIV_CLASS;

3408

break;

3409

case OP_OPEN_CHAR_CLASS:

3410

elem->type = CHAR_CLASS;

3411

break;

3412

default:

3413

break;

3414

}

3415

return REG_NOERROR;

3416

}

3417

3418

/* Helper function for parse_bracket_exp.

3419

Build the equivalence class which is represented by NAME.

3420

The result are written to MBCSET and SBCSET.

3421

EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,

3422

is a pointer argument since we may update it. */

3423

3424

static reg_errcode_t

3425

#ifdef RE_ENABLE_I18N

3426

build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,

3427

int *equiv_class_alloc, const unsigned char *name)

3428

#else /* not RE_ENABLE_I18N */

3429

build_equiv_class (bitset_t sbcset, const unsigned char *name)

3430

#endif /* not RE_ENABLE_I18N */

3431

{

3432

#ifdef _LIBC

3433

uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3434

if (nrules != 0)

3435

{

3436

const int32_t *table, *indirect;

3437

const unsigned char *weights, *extra, *cp;

3438

unsigned char char_buf[2];

3439

int32_t idx1, idx2;

3440

unsigned int ch;

3441

size_t len;

3442

/* This #include defines a local function! */

3443

# include <locale/weight.h>

3444

/* Calculate the index for equivalence class. */

3445

cp = name;

3446

table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);

3447

weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,

3448

_NL_COLLATE_WEIGHTMB);

3449

extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,

3450

_NL_COLLATE_EXTRAMB);

3451

indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,

3452

_NL_COLLATE_INDIRECTMB);

3453

idx1 = findidx (&cp);

3454

if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)__builtin_expect (idx1 == 0 || cp < name + strlen ((const char
*) name), 0))

3455

/* This isn't a valid character. */

3456

return REG_ECOLLATE;

3457

3458

/* Build single byte matcing table for this equivalence class. */

3459

char_buf[1] = (unsigned char) '\0';

3460

len = weights[idx1 & 0xffffff];

3461

for (ch = 0; ch < SBC_MAX256; ++ch)

3462

{

3463

char_buf[0] = ch;

3464

cp = char_buf;

3465

idx2 = findidx (&cp);

3466

3467

idx2 = table[ch];

3468

3469

if (idx2 == 0)

3470

/* This isn't a valid character. */

3471

continue;

3472

/* Compare only if the length matches and the collation rule

3473

index is the same. */

3474

if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))

3475

{

3476

int cnt = 0;

3477

3478

while (cnt <= len &&

3479

weights[(idx1 & 0xffffff) + 1 + cnt]

3480

== weights[(idx2 & 0xffffff) + 1 + cnt])

3481

++cnt;

3482

3483

if (cnt > len)

3484

bitset_set (sbcset, ch)(sbcset[ch / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t)
1 << ch % (sizeof (bitset_word_t) * 8));

3485

}

3486

}

3487

/* Check whether the array has enough space. */

3488

if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)__builtin_expect (*equiv_class_alloc == mbcset->nequiv_classes
, 0))

3489

{

3490

/* Not enough, realloc it. */

3491

/* +1 in case of mbcset->nequiv_classes is 0. */

3492

int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;

3493

/* Use realloc since the array is NULL if *alloc == 0. */

3494

int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,((mbcset->equiv_classes != ((void*)0)) ? (int32_t *) realloc
(mbcset->equiv_classes,(new_equiv_class_alloc)*sizeof(int32_t
)) : (int32_t *) calloc(new_equiv_class_alloc,sizeof(int32_t)
))

3495

int32_t,((mbcset->equiv_classes != ((void*)0)) ? (int32_t *) realloc
(mbcset->equiv_classes,(new_equiv_class_alloc)*sizeof(int32_t
)) : (int32_t *) calloc(new_equiv_class_alloc,sizeof(int32_t)
))

3496

new_equiv_class_alloc)((mbcset->equiv_classes != ((void*)0)) ? (int32_t *) realloc
(mbcset->equiv_classes,(new_equiv_class_alloc)*sizeof(int32_t
)) : (int32_t *) calloc(new_equiv_class_alloc,sizeof(int32_t)
));

3497

if (BE (new_equiv_classes == NULL, 0)__builtin_expect (new_equiv_classes == ((void*)0), 0))

3498

return REG_ESPACE;

3499

mbcset->equiv_classes = new_equiv_classes;

3500

*equiv_class_alloc = new_equiv_class_alloc;

3501

}

3502

mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;

3503

}

3504

else

3505

#endif /* _LIBC */

3506

{

3507

if (BE (strlen ((const char *) name) != 1, 0)__builtin_expect (strlen ((const char *) name) != 1, 0))

3508

return REG_ECOLLATE;

3509

bitset_set (sbcset, *name)(sbcset[*name / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << *name % (sizeof (bitset_word_t) * 8));

3510

}

3511

return REG_NOERROR;

3512

}

3513

3514

/* Helper function for parse_bracket_exp.

3515

Build the character class which is represented by NAME.

3516

The result are written to MBCSET and SBCSET.

3517

CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,

3518

is a pointer argument since we may update it. */

3519

3520

static reg_errcode_t

3521

#ifdef RE_ENABLE_I18N

3522

build_charclass (RE_TRANSLATE_TYPEunsigned char * trans, bitset_t sbcset,

3523

re_charset_t *mbcset, int *char_class_alloc,

3524

const char *class_name, reg_syntax_t syntax)

3525

#else /* not RE_ENABLE_I18N */

3526

build_charclass (RE_TRANSLATE_TYPEunsigned char * trans, bitset_t sbcset,

3527

const char *class_name, reg_syntax_t syntax)

3528

#endif /* not RE_ENABLE_I18N */

3529

{

3530

int i;

3531

3532

/* In case of REG_ICASE "upper" and "lower" match the both of

3533

upper and lower cases. */

3534

if ((syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1))

3535

&& (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0))

3536

class_name = "alpha";

3537

3538

#ifdef RE_ENABLE_I18N

3539

/* Check the space of the arrays. */

3540

if (BE (*char_class_alloc == mbcset->nchar_classes, 0)__builtin_expect (*char_class_alloc == mbcset->nchar_classes
, 0))

3541

{

3542

/* Not enough, realloc it. */

3543

/* +1 in case of mbcset->nchar_classes is 0. */

3544

int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;

3545

/* Use realloc since array is NULL if *alloc == 0. */

3546

wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,((mbcset->char_classes != ((void*)0)) ? (wctype_t *) realloc
(mbcset->char_classes,(new_char_class_alloc)*sizeof(wctype_t
)) : (wctype_t *) calloc(new_char_class_alloc,sizeof(wctype_t
)))

3547

new_char_class_alloc)((mbcset->char_classes != ((void*)0)) ? (wctype_t *) realloc
(mbcset->char_classes,(new_char_class_alloc)*sizeof(wctype_t
)) : (wctype_t *) calloc(new_char_class_alloc,sizeof(wctype_t
)));

3548

if (BE (new_char_classes == NULL, 0)__builtin_expect (new_char_classes == ((void*)0), 0))

3549

return REG_ESPACE;

3550

mbcset->char_classes = new_char_classes;

3551

*char_class_alloc = new_char_class_alloc;

3552

}

3553

mbcset->char_classes[mbcset->nchar_classes++] = __wctypewctype (class_name);

3554

#endif /* RE_ENABLE_I18N */

3555

3556

#define BUILD_CHARCLASS_LOOP(ctype_func)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (ctype_func (i)) (sbcset[trans[i] / (
sizeof (bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans
[i] % (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (ctype_func (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0) \

3557

do { \

3558

if (BE (trans != NULL, 0)__builtin_expect (trans != ((void*)0), 0)) \

3559

{ \

3560

for (i = 0; i < SBC_MAX256; ++i) \

3561

if (ctype_func (i)) \

3562

bitset_set (sbcset, trans[i])(sbcset[trans[i] / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << trans[i] % (sizeof (bitset_word_t) * 8)); \

3563

} \

3564

else \

3565

{ \

3566

for (i = 0; i < SBC_MAX256; ++i) \

3567

if (ctype_func (i)) \

3568

bitset_set (sbcset, i)(sbcset[i / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t) 1
<< i % (sizeof (bitset_word_t) * 8)); \

3569

} \

3570

} while (0)

3571

3572

if (strcmp (class_name, "alnum") == 0)

3573

BUILD_CHARCLASS_LOOP (isalnum)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isalnum (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isalnum (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3574

else if (strcmp (class_name, "cntrl") == 0)

3575

BUILD_CHARCLASS_LOOP (iscntrl)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (iscntrl (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (iscntrl (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3576

else if (strcmp (class_name, "lower") == 0)

3577

BUILD_CHARCLASS_LOOP (islower)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (islower (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (islower (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3578

else if (strcmp (class_name, "space") == 0)

3579

BUILD_CHARCLASS_LOOP (isspace)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isspace (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isspace (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3580

else if (strcmp (class_name, "alpha") == 0)

3581

BUILD_CHARCLASS_LOOP (isalpha)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isalpha (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isalpha (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3582

else if (strcmp (class_name, "digit") == 0)

3583

BUILD_CHARCLASS_LOOP (isdigit)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isdigit (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isdigit (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3584

else if (strcmp (class_name, "print") == 0)

3585

BUILD_CHARCLASS_LOOP (isprint)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isprint (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isprint (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3586

else if (strcmp (class_name, "upper") == 0)

3587

BUILD_CHARCLASS_LOOP (isupper)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isupper (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isupper (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3588

else if (strcmp (class_name, "blank") == 0)

3589

#ifndef GAWK1

3590

BUILD_CHARCLASS_LOOP (isblank)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isblank (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isblank (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3591

#else

3592

/* see comments above */

3593

BUILD_CHARCLASS_LOOP (is_blank)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (is_blank (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (is_blank (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3594

#endif

3595

else if (strcmp (class_name, "graph") == 0)

3596

BUILD_CHARCLASS_LOOP (isgraph)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isgraph (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isgraph (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3597

else if (strcmp (class_name, "punct") == 0)

3598

BUILD_CHARCLASS_LOOP (ispunct)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (ispunct (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (ispunct (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3599

else if (strcmp (class_name, "xdigit") == 0)

3600

BUILD_CHARCLASS_LOOP (isxdigit)do { if (__builtin_expect (trans != ((void*)0), 0)) { for (i =
0; i < 256; ++i) if (isxdigit (i)) (sbcset[trans[i] / (sizeof
(bitset_word_t) * 8)] |= (bitset_word_t) 1 << trans[i]
% (sizeof (bitset_word_t) * 8)); } else { for (i = 0; i <
256; ++i) if (isxdigit (i)) (sbcset[i / (sizeof (bitset_word_t
) * 8)] |= (bitset_word_t) 1 << i % (sizeof (bitset_word_t
) * 8)); } } while (0);

3601

else

3602

return REG_ECTYPE;

3603

3604

return REG_NOERROR;

3605

}

3606

3607

static bin_tree_t *

3608

build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPEunsigned char * trans,

3609

const char *class_name,

3610

const char *extra, int non_match,

3611

reg_errcode_t *err)

3612

{

3613

re_bitset_ptr_t sbcset;

3614

#ifdef RE_ENABLE_I18N

3615

re_charset_t *mbcset;

3616

int alloc = 0;

3617

#endif /* not RE_ENABLE_I18N */

3618

reg_errcode_t ret;

3619

re_token_t br_token;

3620

bin_tree_t *tree;

3621

3622

sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);

3623

#ifdef RE_ENABLE_I18N

3624

mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);

3625

#endif /* RE_ENABLE_I18N */

3626

3627

#ifdef RE_ENABLE_I18N

3628

if (BE (sbcset == NULL || mbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0) || mbcset == ((void*)0
), 0))

3629

#else /* not RE_ENABLE_I18N */

3630

if (BE (sbcset == NULL, 0)__builtin_expect (sbcset == ((void*)0), 0))

3631

#endif /* not RE_ENABLE_I18N */

3632

{

3633

*err = REG_ESPACE;

3634

return NULL((void*)0);

3635

}

3636

3637

if (non_match)

3638

{

3639

#ifdef RE_ENABLE_I18N

3640

mbcset->non_match = 1;

3641

#endif /* not RE_ENABLE_I18N */

3642

}

3643

3644

/* We don't care the syntax in this case. */

3645

ret = build_charclass (trans, sbcset,

3646

#ifdef RE_ENABLE_I18N

3647

mbcset, &alloc,

3648

#endif /* RE_ENABLE_I18N */

3649

class_name, 0);

3650

3651

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

3652

{

3653

re_free (sbcset)free (sbcset);

3654

#ifdef RE_ENABLE_I18N

3655

free_charset (mbcset);

3656

#endif /* RE_ENABLE_I18N */

3657

*err = ret;

3658

return NULL((void*)0);

3659

}

3660

/* \w match '_' also. */

3661

for (; *extra; extra++)

3662

bitset_set (sbcset, *extra)(sbcset[*extra / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << *extra % (sizeof (bitset_word_t) * 8));

3663

3664

/* If it is non-matching list. */

3665

if (non_match)

3666

bitset_not (sbcset);

3667

3668

#ifdef RE_ENABLE_I18N

3669

/* Ensure only single byte characters are set. */

3670

if (dfa->mb_cur_max > 1)

3671

bitset_mask (sbcset, dfa->sb_char);

3672

#endif

3673

3674

/* Build a tree for simple bracket. */

3675

br_token.type = SIMPLE_BRACKET;

3676

br_token.opr.sbcset = sbcset;

3677

tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);

3678

if (BE (tree == NULL, 0)__builtin_expect (tree == ((void*)0), 0))

3679

goto build_word_op_espace;

3680

3681

#ifdef RE_ENABLE_I18N

3682

if (dfa->mb_cur_max > 1)

3683

{

3684

bin_tree_t *mbc_tree;

3685

/* Build a tree for complex bracket. */

3686

br_token.type = COMPLEX_BRACKET;

3687

br_token.opr.mbcset = mbcset;

3688

dfa->has_mb_node = 1;

3689

mbc_tree = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &br_token);

3690

if (BE (mbc_tree == NULL, 0)__builtin_expect (mbc_tree == ((void*)0), 0))

3691

goto build_word_op_espace;

3692

/* Then join them by ALT node. */

3693

tree = create_tree (dfa, tree, mbc_tree, OP_ALT);

3694

if (BE (mbc_tree != NULL, 1)__builtin_expect (mbc_tree != ((void*)0), 1))

3695

return tree;

3696

}

3697

else

3698

{

3699

free_charset (mbcset);

3700

return tree;

3701

}

3702

#else /* not RE_ENABLE_I18N */

3703

return tree;

3704

#endif /* not RE_ENABLE_I18N */

3705

3706

build_word_op_espace:

3707

re_free (sbcset)free (sbcset);

3708

#ifdef RE_ENABLE_I18N

3709

free_charset (mbcset);

3710

#endif /* RE_ENABLE_I18N */

3711

*err = REG_ESPACE;

3712

return NULL((void*)0);

3713

}

3714

3715

/* This is intended for the expressions like "a{1,3}".

3716

Fetch a number from `input', and return the number.

3717

Return -1, if the number field is empty like "{,1}".

3718

Return -2, if an error has occurred. */

3719

3720

static int

3721

fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)

3722

{

3723

int num = -1;

3724

unsigned char c;

3725

while (1)

3726

{

3727

fetch_token (token, input, syntax);

3728

c = token->opr.c;

3729

if (BE (token->type == END_OF_RE, 0)__builtin_expect (token->type == END_OF_RE, 0))

3730

return -2;

3731

if (token->type == OP_CLOSE_DUP_NUM || c == ',')

3732

break;

3733

num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)

3734

? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));

3735

num = (num > RE_DUP_MAX(0x7fff)) ? -2 : num;

3736

}

3737

return num;

3738

}

3739

3740

#ifdef RE_ENABLE_I18N

3741

static void

3742

free_charset (re_charset_t *cset)

3743

{

3744

re_free (cset->mbchars)free (cset->mbchars);

3745

# ifdef _LIBC

3746

re_free (cset->coll_syms)free (cset->coll_syms);

3747

re_free (cset->equiv_classes)free (cset->equiv_classes);

3748

re_free (cset->range_starts)free (cset->range_starts);

3749

re_free (cset->range_ends)free (cset->range_ends);

3750

# endif

3751

re_free (cset->char_classes)free (cset->char_classes);

3752

re_free (cset)free (cset);

3753

}

3754

#endif /* RE_ENABLE_I18N */

3755

3756

/* Functions for binary tree operation. */

3757

3758

/* Create a tree node. */

3759

3760

static bin_tree_t *

3761

create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,

3762

re_token_type_t type)

3763

{

3764

re_token_t t;

3765

t.type = type;

3766

return create_token_tree (dfa, left, right, &t);

←

Calling 'create_token_tree'

→

←

Returning from 'create_token_tree'

→

3767

}

3768

3769

static bin_tree_t *

3770

create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,

3771

const re_token_t *token)

3772

{

3773

bin_tree_t *tree;

3774

if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)__builtin_expect (dfa->str_tree_storage_idx == ((1024 - sizeof
(void *)) / sizeof (bin_tree_t)), 0))

←

Taking false branch

→

3775

{

3776

bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1)((bin_tree_storage_t *) malloc ((1) * sizeof (bin_tree_storage_t
)));

3777

3778

if (storage == NULL((void*)0))

3779

return NULL((void*)0);

3780

storage->next = dfa->str_tree_storage;

3781

dfa->str_tree_storage = storage;

3782

dfa->str_tree_storage_idx = 0;

3783

}

3784

tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];

3785

3786

tree->parent = NULL((void*)0);

3787

tree->left = left;

3788

tree->right = right;

3789

tree->token = *token;

3790

tree->token.duplicated = 0;

3791

tree->token.opt_subexp = 0;

3792

tree->first = NULL((void*)0);

←

Null pointer value stored to field 'first'

→

3793

tree->next = NULL((void*)0);

3794

tree->node_idx = -1;

3795

3796

if (left != NULL((void*)0))

←

Taking false branch

→

3797

left->parent = tree;

3798

if (right != NULL((void*)0))

←

Taking false branch

→

3799

right->parent = tree;

3800

return tree;

3801

}

3802

3803

/* Mark the tree SRC as an optional subexpression.

3804

To be called from preorder or postorder. */

3805

3806

static reg_errcode_t

3807

mark_opt_subexp (void *extra, bin_tree_t *node)

3808

{

3809

int idx = (int) (intptr_t) extra;

3810

if (node->token.type == SUBEXP && node->token.opr.idx == idx)

3811

node->token.opt_subexp = 1;

3812

3813

return REG_NOERROR;

3814

}

3815

3816

/* Free the allocated memory inside NODE. */

3817

3818

static void

3819

free_token (re_token_t *node)

3820

{

3821

#ifdef RE_ENABLE_I18N

3822

if (node->type == COMPLEX_BRACKET && node->duplicated == 0)

3823

free_charset (node->opr.mbcset);

3824

else

3825

#endif /* RE_ENABLE_I18N */

3826

if (node->type == SIMPLE_BRACKET && node->duplicated == 0)

3827

re_free (node->opr.sbcset)free (node->opr.sbcset);

3828

}

3829

3830

/* Worker function for tree walking. Free the allocated memory inside NODE

3831

and its children. */

3832

3833

static reg_errcode_t

3834

free_tree (void *extra, bin_tree_t *node)

3835

{

3836

free_token (&node->token);

3837

return REG_NOERROR;

3838

}

3839

3840

3841

/* Duplicate the node SRC, and return new node. This is a preorder

3842

visit similar to the one implemented by the generic visitor, but

3843

we need more infrastructure to maintain two parallel trees --- so,

3844

it's easier to duplicate. */

3845

3846

static bin_tree_t *

3847

duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)

3848

{

3849

const bin_tree_t *node;

3850

bin_tree_t *dup_root;

3851

bin_tree_t **p_new = &dup_root, *dup_node = root->parent;

3852

3853

for (node = root; ; )

3854

{

3855

/* Create a new tree and link it back to the current parent. */

3856

*p_new = create_token_tree (dfa, NULL((void*)0), NULL((void*)0), &node->token);

3857

if (*p_new == NULL((void*)0))

3858

return NULL((void*)0);

3859

(*p_new)->parent = dup_node;

3860

(*p_new)->token.duplicated = 1;

3861

dup_node = *p_new;

3862

3863

/* Go to the left node, or up and to the right. */

3864

if (node->left)

3865

{

3866

node = node->left;

3867

p_new = &dup_node->left;

3868

}

3869

else

3870

{

3871

const bin_tree_t *prev = NULL((void*)0);

3872

while (node->right == prev || node->right == NULL((void*)0))

3873

{

3874

prev = node;

3875

node = node->parent;

3876

dup_node = dup_node->parent;

3877

if (!node)

3878

return dup_root;

3879

}

3880

node = node->right;

3881

p_new = &dup_node->right;

3882

}

3883

}

3884

}