compat/regex/regexec.c

Bug Summary

File:	compat/regex/regexec.c
Location:	line 3481, column 15
Description:	Assigned value is garbage or undefined

Annotated Source Code

/* Extended regular expression matching and search library.

This file is part of the GNU C Library.

Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.

The GNU C Library is free software; you can redistribute it and/or

modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either

version 2.1 of the License, or (at your option) any later version.

The GNU C Library is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public

License along with the GNU C Library; if not, write to the Free

Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

02110-1301 USA. */

static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,

int n) internal_function;

static void match_ctx_clean (re_match_context_t *mctx) internal_function;

static void match_ctx_free (re_match_context_t *cache) internal_function;

static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,

int str_idx, int from, int to)

internal_function;

static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)

internal_function;

static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,

int str_idx) internal_function;

static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,

int node, int str_idx)

internal_function;

static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

re_dfastate_t **limited_sts, int last_node,

int last_str_idx)

internal_function;

static reg_errcode_t re_search_internal (const regex_t *preg,

const char *string, int length,

int start, int range, int stop,

size_t nmatch, regmatch_t pmatch[],

int eflags);

static int re_search_2_stub (struct re_pattern_buffer *bufp,

const char *string1, int length1,

const char *string2, int length2,

int start, int range, struct re_registers *regs,

int stop, int ret_len);

static int re_search_stub (struct re_pattern_buffer *bufp,

const char *string, int length, int start,

int range, int stop, struct re_registers *regs,

int ret_len);

static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,

int nregs, int regs_allocated);

static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);

static int check_matching (re_match_context_t *mctx, int fl_longest_match,

int *p_match_first) internal_function;

static int check_halt_state_context (const re_match_context_t *mctx,

const re_dfastate_t *state, int idx)

internal_function;

static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

regmatch_t *prev_idx_match, int cur_node,

int cur_idx, int nmatch) internal_function;

static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,

int str_idx, int dest_node, int nregs,

regmatch_t *regs,

re_node_set *eps_via_nodes)

internal_function;

static reg_errcode_t set_regs (const regex_t *preg,

const re_match_context_t *mctx,

size_t nmatch, regmatch_t *pmatch,

int fl_backtrack) internal_function;

static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)

internal_function;

#ifdef RE_ENABLE_I18N

static int sift_states_iter_mb (const re_match_context_t *mctx,

re_sift_context_t *sctx,

int node_idx, int str_idx, int max_str_idx)

internal_function;

#endif /* RE_ENABLE_I18N */

static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,

re_sift_context_t *sctx)

internal_function;

static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,

re_sift_context_t *sctx, int str_idx,

re_node_set *cur_dest)

internal_function;

static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,

re_sift_context_t *sctx,

int str_idx,

re_node_set *dest_nodes)

internal_function;

static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,

re_node_set *dest_nodes,

const re_node_set *candidates)

internal_function;

static int check_dst_limits (const re_match_context_t *mctx,

re_node_set *limits,

100

int dst_node, int dst_idx, int src_node,

101

int src_idx) internal_function;

102

static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,

103

int boundaries, int subexp_idx,

104

int from_node, int bkref_idx)

105

internal_function;

106

static int check_dst_limits_calc_pos (const re_match_context_t *mctx,

107

int limit, int subexp_idx,

108

int node, int str_idx,

109

int bkref_idx) internal_function;

110

static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,

111

re_node_set *dest_nodes,

112

const re_node_set *candidates,

113

re_node_set *limits,

114

struct re_backref_cache_entry *bkref_ents,

115

int str_idx) internal_function;

116

static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,

117

re_sift_context_t *sctx,

118

int str_idx, const re_node_set *candidates)

119

internal_function;

120

static reg_errcode_t merge_state_array (const re_dfa_t *dfa,

121

re_dfastate_t **dst,

122

re_dfastate_t **src, int num)

123

internal_function;

124

static re_dfastate_t *find_recover_state (reg_errcode_t *err,

125

re_match_context_t *mctx) internal_function;

126

static re_dfastate_t *transit_state (reg_errcode_t *err,

127

re_match_context_t *mctx,

128

re_dfastate_t *state) internal_function;

129

static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,

130

re_match_context_t *mctx,

131

re_dfastate_t *next_state)

132

internal_function;

133

static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,

134

re_node_set *cur_nodes,

135

int str_idx) internal_function;

136

#if 0

137

static re_dfastate_t *transit_state_sb (reg_errcode_t *err,

138

re_match_context_t *mctx,

139

re_dfastate_t *pstate)

140

internal_function;

141

#endif

142

#ifdef RE_ENABLE_I18N

143

static reg_errcode_t transit_state_mb (re_match_context_t *mctx,

144

re_dfastate_t *pstate)

145

internal_function;

146

#endif /* RE_ENABLE_I18N */

147

static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,

148

const re_node_set *nodes)

149

internal_function;

150

static reg_errcode_t get_subexp (re_match_context_t *mctx,

151

int bkref_node, int bkref_str_idx)

152

internal_function;

153

static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,

154

const re_sub_match_top_t *sub_top,

155

re_sub_match_last_t *sub_last,

156

int bkref_node, int bkref_str)

157

internal_function;

158

static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

159

int subexp_idx, int type) internal_function;

160

static reg_errcode_t check_arrival (re_match_context_t *mctx,

161

state_array_t *path, int top_node,

162

int top_str, int last_node, int last_str,

163

int type) internal_function;

164

static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,

165

int str_idx,

166

re_node_set *cur_nodes,

167

re_node_set *next_nodes)

168

internal_function;

169

static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,

170

re_node_set *cur_nodes,

171

int ex_subexp, int type)

172

internal_function;

173

static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,

174

re_node_set *dst_nodes,

175

int target, int ex_subexp,

176

int type) internal_function;

177

static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,

178

re_node_set *cur_nodes, int cur_str,

179

int subexp_num, int type)

180

internal_function;

181

static int build_trtable (const re_dfa_t *dfa,

182

re_dfastate_t *state) internal_function;

183

#ifdef RE_ENABLE_I18N

184

static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,

185

const re_string_t *input, int idx)

186

internal_function;

187

# ifdef _LIBC

188

static unsigned int find_collation_sequence_value (const unsigned char *mbs,

189

size_t name_len)

190

internal_function;

191

# endif /* _LIBC */

192

#endif /* RE_ENABLE_I18N */

193

static int group_nodes_into_DFAstates (const re_dfa_t *dfa,

194

const re_dfastate_t *state,

195

re_node_set *states_node,

196

bitset_t *states_ch) internal_function;

197

static int check_node_accept (const re_match_context_t *mctx,

198

const re_token_t *node, int idx)

199

internal_function;

200

static reg_errcode_t extend_buffers (re_match_context_t *mctx)

201

internal_function;

202

203

/* Entry point for POSIX code. */

204

205

/* regexec searches for a given pattern, specified by PREG, in the

206

string STRING.

207

208

If NMATCH is zero or REG_NOSUB was set in the cflags argument to

209

`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at

210

least NMATCH elements, and we set them to the offsets of the

211

corresponding matched substrings.

212

213

EFLAGS specifies `execution flags' which affect matching: if

214

REG_NOTBOL is set, then ^ does not match at the beginning of the

215

string; if REG_NOTEOL is set, then $ does not match at the end.

216

217

We return 0 if we find a match and REG_NOMATCH if not. */

218

219

int

220

regexec (

221

const regex_t *__restrictrestrict preg,

222

const char *__restrictrestrict string,

223

size_t nmatch,

224

regmatch_t pmatch[],

225

int eflags)

226

{

227

reg_errcode_t err;

228

int start, length;

229

230

if (eflags & ~(REG_NOTBOL1 | REG_NOTEOL(1 << 1) | REG_STARTEND(1 << 2)))

231

return REG_BADPAT;

232

233

if (eflags & REG_STARTEND(1 << 2))

234

{

235

start = pmatch[0].rm_so;

236

length = pmatch[0].rm_eo;

237

}

238

else

239

{

240

start = 0;

241

length = strlen (string);

242

}

243

244

__libc_lock_lock (dfa->lock)do { } while (0);

245

if (preg->no_sub)

246

err = re_search_internal (preg, string, length, start, length - start,

247

length, 0, NULL((void*)0), eflags);

248

else

249

err = re_search_internal (preg, string, length, start, length - start,

250

length, nmatch, pmatch, eflags);

251

__libc_lock_unlock (dfa->lock)do { } while (0);

252

return err != REG_NOERROR;

253

}

254

255

#ifdef _LIBC

256

# include <shlib-compat.h>

257

versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);

258

259

# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)

260

__typeof__ (__regexec) __compat_regexec;

261

262

int

263

attribute_compat_text_section

264

__compat_regexec (const regex_t *__restrictrestrict preg,

265

const char *__restrictrestrict string, size_t nmatch,

266

regmatch_t pmatch[], int eflags)

267

{

268

return regexec (preg, string, nmatch, pmatch,

269

eflags & (REG_NOTBOL1 | REG_NOTEOL(1 << 1)));

270

}

271

compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);

272

# endif

273

#endif

274

275

/* Entry points for GNU code. */

276

277

/* re_match, re_search, re_match_2, re_search_2

278

279

The former two functions operate on STRING with length LENGTH,

280

while the later two operate on concatenation of STRING1 and STRING2

281

with lengths LENGTH1 and LENGTH2, respectively.

282

283

re_match() matches the compiled pattern in BUFP against the string,

284

starting at index START.

285

286

re_search() first tries matching at index START, then it tries to match

287

starting from index START + 1, and so on. The last start position tried

288

is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same

289

way as re_match().)

290

291

The parameter STOP of re_{match,search}_2 specifies that no match exceeding

292

the first STOP characters of the concatenation of the strings should be

293

concerned.

294

295

If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match

296

and all groups is stroed in REGS. (For the "_2" variants, the offsets are

297

computed relative to the concatenation, not relative to the individual

298

strings.)

299

300

On success, re_match* functions return the length of the match, re_search*

301

return the position of the start of the match. Return value -1 means no

302

match was found and -2 indicates an internal error. */

303

304

int

305

re_match (struct re_pattern_buffer *bufp,

306

const char *string,

307

int length,

308

int start,

309

struct re_registers *regs)

310

{

311

return re_search_stub (bufp, string, length, start, 0, length, regs, 1);

312

}

313

#ifdef _LIBC

314

weak_alias (__re_match, re_match)

315

#endif

316

317

int

318

re_search (struct re_pattern_buffer *bufp,

319

const char *string,

320

int length, int start, int range,

321

struct re_registers *regs)

322

{

323

return re_search_stub (bufp, string, length, start, range, length, regs, 0);

324

}

325

#ifdef _LIBC

326

weak_alias (__re_search, re_search)

327

#endif

328

329

int

330

re_match_2 (struct re_pattern_buffer *bufp,

331

const char *string1, int length1,

332

const char *string2, int length2, int start,

333

struct re_registers *regs, int stop)

334

{

335

return re_search_2_stub (bufp, string1, length1, string2, length2,

336

start, 0, regs, stop, 1);

337

}

338

#ifdef _LIBC

339

weak_alias (__re_match_2, re_match_2)

340

#endif

341

342

int

343

re_search_2 (struct re_pattern_buffer *bufp,

344

const char *string1, int length1,

345

const char *string2, int length2, int start,

346

int range, struct re_registers *regs, int stop)

347

{

348

return re_search_2_stub (bufp, string1, length1, string2, length2,

349

start, range, regs, stop, 0);

350

}

351

#ifdef _LIBC

352

weak_alias (__re_search_2, re_search_2)

353

#endif

354

355

static int

356

re_search_2_stub (struct re_pattern_buffer *bufp,

357

const char *string1, int length1,

358

const char *string2, int length2, int start,

359

int range, struct re_registers *regs,

360

int stop, int ret_len)

361

{

362

const char *str;

363

int rval;

364

int len = length1 + length2;

365

int free_str = 0;

366

367

if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)__builtin_expect (length1 < 0 || length2 < 0 || stop <
0, 0))

368

return -2;

369

370

/* Concatenate the strings. */

371

if (length2 > 0)

372

if (length1 > 0)

373

{

374

char *s = re_malloc (char, len)((char *) malloc ((len) * sizeof (char)));

375

376

if (BE (s == NULL, 0)__builtin_expect (s == ((void*)0), 0))

377

return -2;

378

memcpy (s, string1, length1)__builtin___memcpy_chk (s, string1, length1, __builtin_object_size
(s, 0));

379

memcpy (s + length1, string2, length2)__builtin___memcpy_chk (s + length1, string2, length2, __builtin_object_size
(s + length1, 0));

380

str = s;

381

free_str = 1;

382

}

383

else

384

str = string2;

385

else

386

str = string1;

387

388

rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);

389

if (free_str)

390

re_free ((char *) str)free ((char *) str);

391

return rval;

392

}

393

394

/* The parameters have the same meaning as those of re_search.

395

Additional parameters:

396

If RET_LEN is nonzero the length of the match is returned (re_match style);

397

otherwise the position of the match is returned. */

398

399

static int

400

re_search_stub (struct re_pattern_buffer *bufp,

401

const char *string, int length, int start,

402

int range, int stop,

403

struct re_registers *regs, int ret_len)

404

{

405

reg_errcode_t result;

406

regmatch_t *pmatch;

407

int nregs, rval;

408

int eflags = 0;

409

410

/* Check for out-of-range. */

411

if (BE (start < 0 || start > length, 0)__builtin_expect (start < 0 || start > length, 0))

412

return -1;

413

if (BE (start + range > length, 0)__builtin_expect (start + range > length, 0))

414

range = length - start;

415

else if (BE (start + range < 0, 0)__builtin_expect (start + range < 0, 0))

416

range = -start;

417

418

__libc_lock_lock (dfa->lock)do { } while (0);

419

420

eflags |= (bufp->not_bol) ? REG_NOTBOL1 : 0;

421

eflags |= (bufp->not_eol) ? REG_NOTEOL(1 << 1) : 0;

422

423

/* Compile fastmap if we haven't yet. */

424

if (range > 0 && bufp->fastmap != NULL((void*)0) && !bufp->fastmap_accurate)

425

re_compile_fastmap (bufp);

426

427

if (BE (bufp->no_sub, 0)__builtin_expect (bufp->no_sub, 0))

428

regs = NULL((void*)0);

429

430

/* We need at least 1 register. */

431

if (regs == NULL((void*)0))

432

nregs = 1;

433

else if (BE (bufp->regs_allocated == REGS_FIXED &&__builtin_expect (bufp->regs_allocated == 2 && regs
->num_regs < bufp->re_nsub + 1, 0)

434

regs->num_regs < bufp->re_nsub + 1, 0)__builtin_expect (bufp->regs_allocated == 2 && regs
->num_regs < bufp->re_nsub + 1, 0))

435

{

436

nregs = regs->num_regs;

437

if (BE (nregs < 1, 0)__builtin_expect (nregs < 1, 0))

438

{

439

/* Nothing can be copied to regs. */

440

regs = NULL((void*)0);

441

nregs = 1;

442

}

443

}

444

else

445

nregs = bufp->re_nsub + 1;

446

pmatch = re_malloc (regmatch_t, nregs)((regmatch_t *) malloc ((nregs) * sizeof (regmatch_t)));

447

if (BE (pmatch == NULL, 0)__builtin_expect (pmatch == ((void*)0), 0))

448

{

449

rval = -2;

450

goto out;

451

}

452

453

result = re_search_internal (bufp, string, length, start, range, stop,

454

nregs, pmatch, eflags);

455

456

rval = 0;

457

458

/* I hope we needn't fill their regs with -1's when no match was found. */

459

if (result != REG_NOERROR)

460

rval = -1;

461

else if (regs != NULL((void*)0))

462

{

463

/* If caller wants register contents data back, copy them. */

464

bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,

465

bufp->regs_allocated);

466

if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)__builtin_expect (bufp->regs_allocated == 0, 0))

467

rval = -2;

468

}

469

470

if (BE (rval == 0, 1)__builtin_expect (rval == 0, 1))

471

{

472

if (ret_len)

473

{

474

assert (pmatch[0].rm_so == start)(__builtin_expect(!(pmatch[0].rm_so == start), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 474, "pmatch[0].rm_so == start"
) : (void)0);

475

rval = pmatch[0].rm_eo - start;

476

}

477

else

478

rval = pmatch[0].rm_so;

479

}

480

re_free (pmatch)free (pmatch);

481

out:

482

__libc_lock_unlock (dfa->lock)do { } while (0);

483

return rval;

484

}

485

486

static unsigned

487

re_copy_regs (struct re_registers *regs,

488

regmatch_t *pmatch,

489

int nregs, int regs_allocated)

490

{

491

int rval = REGS_REALLOCATE1;

492

int i;

493

int need_regs = nregs + 1;

494

/* We need one extra element beyond `num_regs' for the `-1' marker GNU code

495

uses. */

496

497

/* Have the register data arrays been allocated? */

498

if (regs_allocated == REGS_UNALLOCATED0)

499

{ /* No. So allocate them with malloc. */

500

regs->start = re_malloc (regoff_t, need_regs)((regoff_t *) malloc ((need_regs) * sizeof (regoff_t)));

501

if (BE (regs->start == NULL, 0)__builtin_expect (regs->start == ((void*)0), 0))

502

return REGS_UNALLOCATED0;

503

regs->end = re_malloc (regoff_t, need_regs)((regoff_t *) malloc ((need_regs) * sizeof (regoff_t)));

504

if (BE (regs->end == NULL, 0)__builtin_expect (regs->end == ((void*)0), 0))

505

{

506

re_free (regs->start)free (regs->start);

507

return REGS_UNALLOCATED0;

508

}

509

regs->num_regs = need_regs;

510

}

511

else if (regs_allocated == REGS_REALLOCATE1)

512

{ /* Yes. If we need more elements than were already

513

allocated, reallocate them. If we need fewer, just

514

leave it alone. */

515

if (BE (need_regs > regs->num_regs, 0)__builtin_expect (need_regs > regs->num_regs, 0))

516

{

517

regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs)((regs->start != ((void*)0)) ? (regoff_t *) realloc (regs->
start,(need_regs)*sizeof(regoff_t)) : (regoff_t *) calloc(need_regs
,sizeof(regoff_t)));

518

regoff_t *new_end;

519

if (BE (new_start == NULL, 0)__builtin_expect (new_start == ((void*)0), 0))

520

return REGS_UNALLOCATED0;

521

new_end = re_realloc (regs->end, regoff_t, need_regs)((regs->end != ((void*)0)) ? (regoff_t *) realloc (regs->
end,(need_regs)*sizeof(regoff_t)) : (regoff_t *) calloc(need_regs
,sizeof(regoff_t)));

522

if (BE (new_end == NULL, 0)__builtin_expect (new_end == ((void*)0), 0))

523

{

524

re_free (new_start)free (new_start);

525

return REGS_UNALLOCATED0;

526

}

527

regs->start = new_start;

528

regs->end = new_end;

529

regs->num_regs = need_regs;

530

}

531

}

532

else

533

{

534

assert (regs_allocated == REGS_FIXED)(__builtin_expect(!(regs_allocated == 2), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 534, "regs_allocated == REGS_FIXED"
) : (void)0);

535

/* This function may not be called with REGS_FIXED and nregs too big. */

536

assert (regs->num_regs >= nregs)(__builtin_expect(!(regs->num_regs >= nregs), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 536, "regs->num_regs >= nregs"
) : (void)0);

537

rval = REGS_FIXED2;

538

}

539

540

/* Copy the regs. */

541

for (i = 0; i < nregs; ++i)

542

{

543

regs->start[i] = pmatch[i].rm_so;

544

regs->end[i] = pmatch[i].rm_eo;

545

}

546

for ( ; i < regs->num_regs; ++i)

547

regs->start[i] = regs->end[i] = -1;

548

549

return rval;

550

}

551

552

/* Set REGS to hold NUM_REGS registers, storing them in STARTS and

553

ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use

554

this memory for recording register information. STARTS and ENDS

555

must be allocated using the malloc library routine, and must each

556

be at least NUM_REGS * sizeof (regoff_t) bytes long.

557

558

If NUM_REGS == 0, then subsequent matches should allocate their own

559

560

561

Unless this function is called, the first search or match using

562

PATTERN_BUFFER will allocate its own register data, without

563

freeing the old data. */

564

565

void

566

re_set_registers (struct re_pattern_buffer *bufp,

567

struct re_registers *regs,

568

unsigned num_regs,

569

regoff_t *starts,

570

regoff_t *ends)

571

{

572

if (num_regs)

573

{

574

bufp->regs_allocated = REGS_REALLOCATE1;

575

regs->num_regs = num_regs;

576

regs->start = starts;

577

regs->end = ends;

578

}

579

else

580

{

581

bufp->regs_allocated = REGS_UNALLOCATED0;

582

regs->num_regs = 0;

583

regs->start = regs->end = (regoff_t *) 0;

584

}

585

}

586

#ifdef _LIBC

587

weak_alias (__re_set_registers, re_set_registers)

588

#endif

589

590

/* Entry points compatible with 4.2 BSD regex library. We don't define

591

them unless specifically requested. */

592

593

#if defined _REGEX_RE_COMP || defined _LIBC

594

int

595

# ifdef _LIBC

596

weak_function

597

# endif

598

re_exec (s)

599

const char *s;

600

{

601

return 0 == regexec (&re_comp_buf, s, 0, NULL((void*)0), 0);

602

}

603

#endif /* _REGEX_RE_COMP */

604

605

/* Internal entry point. */

606

607

/* Searches for a compiled pattern PREG in the string STRING, whose

608

length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same

609

mingings with regexec. START, and RANGE have the same meanings

610

with re_search.

611

Return REG_NOERROR if we find a match, and REG_NOMATCH if not,

612

otherwise return the error code.

613

Note: We assume front end functions already check ranges.

614

(START + RANGE >= 0 && START + RANGE <= LENGTH) */

615

616

static reg_errcode_t

617

re_search_internal (const regex_t *preg,

618

const char *string,

619

int length, int start, int range, int stop,

620

size_t nmatch, regmatch_t pmatch[],

621

int eflags)

622

{

623

reg_errcode_t err;

624

const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;

625

int left_lim, right_lim, incr;

626

int fl_longest_match, match_first, match_kind, match_last = -1;

627

int extra_nmatch;

628

int sb, ch;

629

#if defined _LIBC || (defined __STDC_VERSION__201112L && __STDC_VERSION__201112L >= 199901L)

630

re_match_context_t mctx = { .dfa = dfa };

631

#else

632

re_match_context_t mctx;

633

#endif

634

char *fastmap = (preg->fastmap != NULL((void*)0) && preg->fastmap_accurate

635

&& range && !preg->can_be_null) ? preg->fastmap : NULL((void*)0);

636

RE_TRANSLATE_TYPEunsigned char * t = preg->translate;

637

638

#if !(defined _LIBC || (defined __STDC_VERSION__201112L && __STDC_VERSION__201112L >= 199901L))

639

memset (&mctx, '\0', sizeof (re_match_context_t))__builtin___memset_chk (&mctx, '\0', sizeof (re_match_context_t
), __builtin_object_size (&mctx, 0));

640

mctx.dfa = dfa;

641

#endif

642

643

extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;

644

nmatch -= extra_nmatch;

645

646

/* Check if the DFA haven't been compiled. */

647

if (BE (preg->used == 0 || dfa->init_state == NULL__builtin_expect (preg->used == 0 || dfa->init_state ==
((void*)0) || dfa->init_state_word == ((void*)0) || dfa->
init_state_nl == ((void*)0) || dfa->init_state_begbuf == (
(void*)0), 0)

648

|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL__builtin_expect (preg->used == 0 || dfa->init_state ==
((void*)0) || dfa->init_state_word == ((void*)0) || dfa->
init_state_nl == ((void*)0) || dfa->init_state_begbuf == (
(void*)0), 0)

649

|| dfa->init_state_begbuf == NULL, 0)__builtin_expect (preg->used == 0 || dfa->init_state ==
((void*)0) || dfa->init_state_word == ((void*)0) || dfa->
init_state_nl == ((void*)0) || dfa->init_state_begbuf == (
(void*)0), 0))

650

return REG_NOMATCH;

651

652

#ifdef DEBUG

653

/* We assume front-end functions already check them. */

654

assert (start + range >= 0 && start + range <= length)(__builtin_expect(!(start + range >= 0 && start + range
<= length), 0) ? __assert_rtn(__func__, "compat/regex/regexec.c"
, 654, "start + range >= 0 && start + range <= length"
) : (void)0);

655

#endif

656

657

/* If initial states with non-begbuf contexts have no elements,

658

the regex must be anchored. If preg->newline_anchor is set,

659

we'll never use init_state_nl, so do not check it. */

660

if (dfa->init_state->nodes.nelem == 0

661

&& dfa->init_state_word->nodes.nelem == 0

662

&& (dfa->init_state_nl->nodes.nelem == 0

663

|| !preg->newline_anchor))

664

{

665

if (start != 0 && start + range != 0)

666

return REG_NOMATCH;

667

start = range = 0;

668

}

669

670

/* We must check the longest matching, if nmatch > 0. */

671

fl_longest_match = (nmatch != 0 || dfa->nbackref);

672

673

err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,

674

preg->translate, preg->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1), dfa);

675

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

676

goto free_return;

677

mctx.input.stop = stop;

678

mctx.input.raw_stop = stop;

679

mctx.input.newline_anchor = preg->newline_anchor;

680

681

err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);

682

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

683

goto free_return;

684

685

/* We will log all the DFA states through which the dfa pass,

686

if nmatch > 1, or this dfa has "multibyte node", which is a

687

back-reference or a node which can accept multibyte character or

688

multi character collating element. */

689

if (nmatch > 1 || dfa->has_mb_node)

690

{

691

/* Avoid overflow. */

692

if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)__builtin_expect (18446744073709551615ULL / sizeof (re_dfastate_t
*) <= mctx.input.bufs_len, 0))

693

{

694

err = REG_ESPACE;

695

goto free_return;

696

}

697

698

mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1)((re_dfastate_t * *) malloc ((mctx.input.bufs_len + 1) * sizeof
(re_dfastate_t *)));

699

if (BE (mctx.state_log == NULL, 0)__builtin_expect (mctx.state_log == ((void*)0), 0))

700

{

701

err = REG_ESPACE;

702

goto free_return;

703

}

704

}

705

else

706

mctx.state_log = NULL((void*)0);

707

708

match_first = start;

709

mctx.input.tip_context = (eflags & REG_NOTBOL1) ? CONTEXT_BEGBUF((1 << 1) << 1)

710

: CONTEXT_NEWLINE(1 << 1) | CONTEXT_BEGBUF((1 << 1) << 1);

711

712

/* Check incrementally whether of not the input string match. */

713

incr = (range < 0) ? -1 : 1;

714

left_lim = (range < 0) ? start + range : start;

715

right_lim = (range < 0) ? start : start + range;

716

sb = dfa->mb_cur_max == 1;

717

match_kind =

718

(fastmap

719

? ((sb || !(preg->syntax & RE_ICASE(((((((((((((((((((((((unsigned long int) 1) << 1) <<
1) << 1) << 1) << 1) << 1) << 1
) << 1) << 1) << 1) << 1) << 1)
<< 1) << 1) << 1) << 1) << 1) <<
1) << 1) << 1) << 1) || t) ? 4 : 0)

720

| (range >= 0 ? 2 : 0)

721

| (t != NULL((void*)0) ? 1 : 0))

722

: 8);

723

724

for (;; match_first += incr)

725

{

726

err = REG_NOMATCH;

727

if (match_first < left_lim || right_lim < match_first)

728

goto free_return;

729

730

/* Advance as rapidly as possible through the string, until we

731

find a plausible place to start matching. This may be done

732

with varying efficiency, so there are various possibilities:

733

only the most common of them are specialized, in order to

734

save on code size. We use a switch statement for speed. */

735

switch (match_kind)

736

{

737

case 8:

738

/* No fastmap. */

739

break;

740

741

case 7:

742

/* Fastmap with single-byte translation, match forward. */

743

while (BE (match_first < right_lim, 1)__builtin_expect (match_first < right_lim, 1)

744

&& !fastmap[t[(unsigned char) string[match_first]]])

745

++match_first;

746

goto forward_match_found_start_or_reached_end;

747

748

case 6:

749

/* Fastmap without translation, match forward. */

750

while (BE (match_first < right_lim, 1)__builtin_expect (match_first < right_lim, 1)

751

&& !fastmap[(unsigned char) string[match_first]])

752

++match_first;

753

754

forward_match_found_start_or_reached_end:

755

if (BE (match_first == right_lim, 0)__builtin_expect (match_first == right_lim, 0))

756

{

757

ch = match_first >= length

758

? 0 : (unsigned char) string[match_first];

759

if (!fastmap[t ? t[ch] : ch])

760

goto free_return;

761

}

762

break;

763

764

case 4:

765

case 5:

766

/* Fastmap without multi-byte translation, match backwards. */

767

while (match_first >= left_lim)

768

{

769

ch = match_first >= length

770

? 0 : (unsigned char) string[match_first];

771

if (fastmap[t ? t[ch] : ch])

772

break;

773

--match_first;

774

}

775

if (match_first < left_lim)

776

goto free_return;

777

break;

778

779

default:

780

/* In this case, we can't determine easily the current byte,

781

since it might be a component byte of a multibyte

782

character. Then we use the constructed buffer instead. */

783

for (;;)

784

{

785

/* If MATCH_FIRST is out of the valid range, reconstruct the

786

buffers. */

787

unsigned int offset = match_first - mctx.input.raw_mbs_idx;

788

if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)__builtin_expect (offset >= (unsigned int) mctx.input.valid_raw_len
, 0))

789

{

790

err = re_string_reconstruct (&mctx.input, match_first,

791

eflags);

792

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

793

goto free_return;

794

795

offset = match_first - mctx.input.raw_mbs_idx;

796

}

797

/* If MATCH_FIRST is out of the buffer, leave it as '\0'.

798

Note that MATCH_FIRST must not be smaller than 0. */

799

ch = (match_first >= length

800

? 0 : re_string_byte_at (&mctx.input, offset)((&mctx.input)->mbs[offset]));

801

if (fastmap[ch])

802

break;

803

match_first += incr;

804

if (match_first < left_lim || match_first > right_lim)

805

{

806

err = REG_NOMATCH;

807

goto free_return;

808

}

809

}

810

break;

811

}

812

813

/* Reconstruct the buffers so that the matcher can assume that

814

the matching starts from the beginning of the buffer. */

815

err = re_string_reconstruct (&mctx.input, match_first, eflags);

816

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

817

goto free_return;

818

819

#ifdef RE_ENABLE_I18N

820

/* Don't consider this char as a possible match start if it part,

821

yet isn't the head, of a multibyte character. */

822

if (!sb && !re_string_first_byte (&mctx.input, 0)((0) == (&mctx.input)->valid_len || (&mctx.input)->
wcs[0] != WEOF))

823

continue;

824

#endif

825

826

/* It seems to be appropriate one, then use the matcher. */

827

/* We assume that the matching starts from 0. */

828

mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;

829

match_last = check_matching (&mctx, fl_longest_match,

830

range >= 0 ? &match_first : NULL((void*)0));

831

if (match_last != -1)

832

{

833

if (BE (match_last == -2, 0)__builtin_expect (match_last == -2, 0))

834

{

835

err = REG_ESPACE;

836

goto free_return;

837

}

838

else

839

{

840

mctx.match_last = match_last;

841

if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)

842

{

843

re_dfastate_t *pstate = mctx.state_log[match_last];

844

mctx.last_node = check_halt_state_context (&mctx, pstate,

845

match_last);

846

}

847

if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)

848

|| dfa->nbackref)

849

{

850

err = prune_impossible_nodes (&mctx);

851

if (err == REG_NOERROR)

852

break;

853

if (BE (err != REG_NOMATCH, 0)__builtin_expect (err != REG_NOMATCH, 0))

854

goto free_return;

855

match_last = -1;

856

}

857

else

858

break; /* We found a match. */

859

}

860

}

861

862

match_ctx_clean (&mctx);

863

}

864

865

#ifdef DEBUG

866

assert (match_last != -1)(__builtin_expect(!(match_last != -1), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 866, "match_last != -1") : (void)
0);

867

assert (err == REG_NOERROR)(__builtin_expect(!(err == REG_NOERROR), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 867, "err == REG_NOERROR") : (void
)0);

868

#endif

869

870

/* Set pmatch[] if we need. */

871

if (nmatch > 0)

872

{

873

int reg_idx;

874

875

/* Initialize registers. */

876

for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)

877

pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;

878

879

/* Set the points where matching start/end. */

880

pmatch[0].rm_so = 0;

881

pmatch[0].rm_eo = mctx.match_last;

882

883

if (!preg->no_sub && nmatch > 1)

884

{

885

err = set_regs (preg, &mctx, nmatch, pmatch,

886

dfa->has_plural_match && dfa->nbackref > 0);

887

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

888

goto free_return;

889

}

890

891

/* At last, add the offset to the each registers, since we slided

892

the buffers so that we could assume that the matching starts

893

from 0. */

894

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

895

if (pmatch[reg_idx].rm_so != -1)

896

{

897

#ifdef RE_ENABLE_I18N

898

if (BE (mctx.input.offsets_needed != 0, 0)__builtin_expect (mctx.input.offsets_needed != 0, 0))

899

{

900

pmatch[reg_idx].rm_so =

901

(pmatch[reg_idx].rm_so == mctx.input.valid_len

902

? mctx.input.valid_raw_len

903

: mctx.input.offsets[pmatch[reg_idx].rm_so]);

904

pmatch[reg_idx].rm_eo =

905

(pmatch[reg_idx].rm_eo == mctx.input.valid_len

906

? mctx.input.valid_raw_len

907

: mctx.input.offsets[pmatch[reg_idx].rm_eo]);

908

}

909

#else

910

assert (mctx.input.offsets_needed == 0)(__builtin_expect(!(mctx.input.offsets_needed == 0), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 910, "mctx.input.offsets_needed == 0"
) : (void)0);

911

#endif

912

pmatch[reg_idx].rm_so += match_first;

913

pmatch[reg_idx].rm_eo += match_first;

914

}

915

for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)

916

{

917

pmatch[nmatch + reg_idx].rm_so = -1;

918

pmatch[nmatch + reg_idx].rm_eo = -1;

919

}

920

921

if (dfa->subexp_map)

922

for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)

923

if (dfa->subexp_map[reg_idx] != reg_idx)

924

{

925

pmatch[reg_idx + 1].rm_so

926

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;

927

pmatch[reg_idx + 1].rm_eo

928

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;

929

}

930

}

931

932

free_return:

933

re_free (mctx.state_log)free (mctx.state_log);

934

if (dfa->nbackref)

935

match_ctx_free (&mctx);

936

re_string_destruct (&mctx.input);

937

return err;

938

}

939

940

static reg_errcode_t

941

prune_impossible_nodes (re_match_context_t *mctx)

942

{

943

const re_dfa_t *const dfa = mctx->dfa;

944

int halt_node, match_last;

945

reg_errcode_t ret;

946

re_dfastate_t **sifted_states;

947

re_dfastate_t **lim_states = NULL((void*)0);

948

re_sift_context_t sctx;

949

#ifdef DEBUG

950

assert (mctx->state_log != NULL)(__builtin_expect(!(mctx->state_log != ((void*)0)), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 950, "mctx->state_log != NULL"
) : (void)0);

951

#endif

952

match_last = mctx->match_last;

953

halt_node = mctx->last_node;

954

955

/* Avoid overflow. */

956

if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)__builtin_expect (18446744073709551615ULL / sizeof (re_dfastate_t
*) <= match_last, 0))

957

return REG_ESPACE;

958

959

sifted_states = re_malloc (re_dfastate_t *, match_last + 1)((re_dfastate_t * *) malloc ((match_last + 1) * sizeof (re_dfastate_t
*)));

960

if (BE (sifted_states == NULL, 0)__builtin_expect (sifted_states == ((void*)0), 0))

961

{

962

ret = REG_ESPACE;

963

goto free_return;

964

}

965

if (dfa->nbackref)

966

{

967

lim_states = re_malloc (re_dfastate_t *, match_last + 1)((re_dfastate_t * *) malloc ((match_last + 1) * sizeof (re_dfastate_t
*)));

968

if (BE (lim_states == NULL, 0)__builtin_expect (lim_states == ((void*)0), 0))

969

{

970

ret = REG_ESPACE;

971

goto free_return;

972

}

973

while (1)

974

{

975

memset (lim_states, '\0',__builtin___memset_chk (lim_states, '\0', sizeof (re_dfastate_t
*) * (match_last + 1), __builtin_object_size (lim_states, 0)
)

976

sizeof (re_dfastate_t *) * (match_last + 1))__builtin___memset_chk (lim_states, '\0', sizeof (re_dfastate_t
*) * (match_last + 1), __builtin_object_size (lim_states, 0)
);

977

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,

978

match_last);

979

ret = sift_states_backward (mctx, &sctx);

980

re_node_set_free (&sctx.limits)free ((&sctx.limits)->elems);

981

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

982

goto free_return;

983

if (sifted_states[0] != NULL((void*)0) || lim_states[0] != NULL((void*)0))

984

break;

985

986

{

987

--match_last;

988

if (match_last < 0)

989

{

990

ret = REG_NOMATCH;

991

goto free_return;

992

}

993

} while (mctx->state_log[match_last] == NULL((void*)0)

994

|| !mctx->state_log[match_last]->halt);

995

halt_node = check_halt_state_context (mctx,

996

mctx->state_log[match_last],

997

match_last);

998

}

999

ret = merge_state_array (dfa, sifted_states, lim_states,

1000

match_last + 1);

1001

re_free (lim_states)free (lim_states);

1002

lim_states = NULL((void*)0);

1003

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

1004

goto free_return;

1005

}

1006

else

1007

{

1008

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);

1009

ret = sift_states_backward (mctx, &sctx);

1010

re_node_set_free (&sctx.limits)free ((&sctx.limits)->elems);

1011

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

1012

goto free_return;

1013

if (sifted_states[0] == NULL((void*)0))

1014

{

1015

ret = REG_NOMATCH;

1016

goto free_return;

1017

}

1018

}

1019

re_free (mctx->state_log)free (mctx->state_log);

1020

mctx->state_log = sifted_states;

1021

sifted_states = NULL((void*)0);

1022

mctx->last_node = halt_node;

1023

mctx->match_last = match_last;

1024

ret = REG_NOERROR;

1025

free_return:

1026

re_free (sifted_states)free (sifted_states);

1027

re_free (lim_states)free (lim_states);

1028

return ret;

1029

}

1030

1031

/* Acquire an initial state and return it.

1032

We must select appropriate initial state depending on the context,

1033

since initial states may have constraints like "\<", "^", etc.. */

1034

1035

static inline re_dfastate_t *

1036

__attribute ((always_inline))__attribute__ ((always_inline)) internal_function

1037

acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,

1038

int idx)

1039

{

1040

const re_dfa_t *const dfa = mctx->dfa;

1041

if (dfa->init_state->has_constraint)

1042

{

1043

unsigned int context;

1044

context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);

1045

if (IS_WORD_CONTEXT (context)((context) & 1))

1046

return dfa->init_state_word;

1047

else if (IS_ORDINARY_CONTEXT (context)((context) == 0))

1048

return dfa->init_state;

1049

else if (IS_BEGBUF_CONTEXT (context)((context) & ((1 << 1) << 1)) && IS_NEWLINE_CONTEXT (context)((context) & (1 << 1)))

1050

return dfa->init_state_begbuf;

1051

else if (IS_NEWLINE_CONTEXT (context)((context) & (1 << 1)))

1052

return dfa->init_state_nl;

1053

else if (IS_BEGBUF_CONTEXT (context)((context) & ((1 << 1) << 1)))

1054

{

1055

/* It is relatively rare case, then calculate on demand. */

1056

return re_acquire_state_context (err, dfa,

1057

dfa->init_state->entrance_nodes,

1058

context);

1059

}

1060

else

1061

/* Must not happen? */

1062

return dfa->init_state;

1063

}

1064

else

1065

return dfa->init_state;

1066

}

1067

1068

/* Check whether the regular expression match input string INPUT or not,

1069

and return the index where the matching end, return -1 if not match,

1070

or return -2 in case of an error.

1071

FL_LONGEST_MATCH means we want the POSIX longest matching.

1072

If P_MATCH_FIRST is not NULL, and the match fails, it is set to the

1073

next place where we may want to try matching.

1074

Note that the matcher assume that the matching starts from the current

1075

index of the buffer. */

1076

1077

static int

1078

internal_function

1079

check_matching (re_match_context_t *mctx, int fl_longest_match,

1080

int *p_match_first)

1081

{

1082

const re_dfa_t *const dfa = mctx->dfa;

1083

reg_errcode_t err;

1084

int match = 0;

1085

int match_last = -1;

1086

int cur_str_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx);

1087

re_dfastate_t *cur_state;

1088

int at_init_state = p_match_first != NULL((void*)0);

1089

int next_start_idx = cur_str_idx;

1090

1091

err = REG_NOERROR;

1092

cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);

1093

/* An initial state must not be NULL (invalid). */

1094

if (BE (cur_state == NULL, 0)__builtin_expect (cur_state == ((void*)0), 0))

1095

{

1096

assert (err == REG_ESPACE)(__builtin_expect(!(err == REG_ESPACE), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 1096, "err == REG_ESPACE") : (void
)0);

1097

return -2;

1098

}

1099

1100

if (mctx->state_log != NULL((void*)0))

1101

{

1102

mctx->state_log[cur_str_idx] = cur_state;

1103

1104

/* Check OP_OPEN_SUBEXP in the initial state in case that we use them

1105

later. E.g. Processing back references. */

1106

if (BE (dfa->nbackref, 0)__builtin_expect (dfa->nbackref, 0))

1107

{

1108

at_init_state = 0;

1109

err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);

1110

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1111

return err;

1112

1113

if (cur_state->has_backref)

1114

{

1115

err = transit_state_bkref (mctx, &cur_state->nodes);

1116

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1117

return err;

1118

}

1119

}

1120

}

1121

1122

/* If the RE accepts NULL string. */

1123

if (BE (cur_state->halt, 0)__builtin_expect (cur_state->halt, 0))

1124

{

1125

if (!cur_state->has_constraint

1126

|| check_halt_state_context (mctx, cur_state, cur_str_idx))

1127

{

1128

if (!fl_longest_match)

1129

return cur_str_idx;

1130

else

1131

{

1132

match_last = cur_str_idx;

1133

match = 1;

1134

}

1135

}

1136

}

1137

1138

while (!re_string_eoi (&mctx->input)((&mctx->input)->stop <= (&mctx->input)->
cur_idx))

1139

{

1140

re_dfastate_t *old_state = cur_state;

1141

int next_char_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx) + 1;

1142

1143

if (BE (next_char_idx >= mctx->input.bufs_len, 0)__builtin_expect (next_char_idx >= mctx->input.bufs_len
, 0)

1144

|| (BE (next_char_idx >= mctx->input.valid_len, 0)__builtin_expect (next_char_idx >= mctx->input.valid_len
, 0)

1145

&& mctx->input.valid_len < mctx->input.len))

1146

{

1147

err = extend_buffers (mctx);

1148

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1149

{

1150

assert (err == REG_ESPACE)(__builtin_expect(!(err == REG_ESPACE), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 1150, "err == REG_ESPACE") : (void
)0);

1151

return -2;

1152

}

1153

}

1154

1155

cur_state = transit_state (&err, mctx, cur_state);

1156

if (mctx->state_log != NULL((void*)0))

1157

cur_state = merge_state_with_log (&err, mctx, cur_state);

1158

1159

if (cur_state == NULL((void*)0))

1160

{

1161

/* Reached the invalid state or an error. Try to recover a valid

1162

state using the state log, if available and if we have not

1163

already found a valid (even if not the longest) match. */

1164

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1165

return -2;

1166

1167

if (mctx->state_log == NULL((void*)0)

1168

|| (match && !fl_longest_match)

1169

|| (cur_state = find_recover_state (&err, mctx)) == NULL((void*)0))

1170

break;

1171

}

1172

1173

if (BE (at_init_state, 0)__builtin_expect (at_init_state, 0))

1174

{

1175

if (old_state == cur_state)

1176

next_start_idx = next_char_idx;

1177

else

1178

at_init_state = 0;

1179

}

1180

1181

if (cur_state->halt)

1182

{

1183

/* Reached a halt state.

1184

Check the halt state can satisfy the current context. */

1185

if (!cur_state->has_constraint

1186

|| check_halt_state_context (mctx, cur_state,

1187

re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx)))

1188

{

1189

/* We found an appropriate halt state. */

1190

match_last = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx);

1191

match = 1;

1192

1193

/* We found a match, do not modify match_first below. */

1194

p_match_first = NULL((void*)0);

1195

if (!fl_longest_match)

1196

break;

1197

}

1198

}

1199

}

1200

1201

if (p_match_first)

1202

*p_match_first += next_start_idx;

1203

1204

return match_last;

1205

}

1206

1207

/* Check NODE match the current context. */

1208

1209

static int

1210

internal_function

1211

check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)

1212

{

1213

re_token_type_t type = dfa->nodes[node].type;

1214

unsigned int constraint = dfa->nodes[node].constraint;

1215

if (type != END_OF_RE)

1216

return 0;

1217

if (!constraint)

1218

return 1;

1219

if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)((((constraint) & 0x0004) && !((context) & 1)
) || (((constraint) & 0x0008) && ((context) &
1)) || (((constraint) & 0x0020) && !((context) &
(1 << 1))) || (((constraint) & 0x0080) && !
((context) & (((1 << 1) << 1) << 1)))))

1220

return 0;

1221

return 1;

1222

}

1223

1224

/* Check the halt state STATE match the current context.

1225

Return 0 if not match, if the node, STATE has, is a halt node and

1226

match the context, return the node. */

1227

1228

static int

1229

internal_function

1230

check_halt_state_context (const re_match_context_t *mctx,

1231

const re_dfastate_t *state, int idx)

1232

{

1233

int i;

1234

unsigned int context;

1235

#ifdef DEBUG

1236

assert (state->halt)(__builtin_expect(!(state->halt), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 1236, "state->halt") : (void)0
);

1237

#endif

1238

context = re_string_context_at (&mctx->input, idx, mctx->eflags);

1239

for (i = 0; i < state->nodes.nelem; ++i)

1240

if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))

1241

return state->nodes.elems[i];

1242

return 0;

1243

}

1244

1245

/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA

1246

corresponding to the DFA).

1247

Return the destination node, and update EPS_VIA_NODES, return -1 in case

1248

of errors. */

1249

1250

static int

1251

internal_function

1252

proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,

1253

int *pidx, int node, re_node_set *eps_via_nodes,

1254

struct re_fail_stack_t *fs)

1255

{

1256

const re_dfa_t *const dfa = mctx->dfa;

1257

int i, err;

1258

if (IS_EPSILON_NODE (dfa->nodes[node].type)((dfa->nodes[node].type) & 8))

1259

{

1260

re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;

1261

re_node_set *edests = &dfa->edests[node];

1262

int dest_node;

1263

err = re_node_set_insert (eps_via_nodes, node);

1264

if (BE (err < 0, 0)__builtin_expect (err < 0, 0))

1265

return -2;

1266

/* Pick up a valid destination, or return -1 if none is found. */

1267

for (dest_node = -1, i = 0; i < edests->nelem; ++i)

1268

{

1269

int candidate = edests->elems[i];

1270

if (!re_node_set_contains (cur_nodes, candidate))

1271

continue;

1272

if (dest_node == -1)

1273

dest_node = candidate;

1274

1275

else

1276

{

1277

/* In order to avoid infinite loop like "(a*)*", return the second

1278

epsilon-transition if the first was already considered. */

1279

if (re_node_set_contains (eps_via_nodes, dest_node))

1280

return candidate;

1281

1282

/* Otherwise, push the second epsilon-transition on the fail stack. */

1283

else if (fs != NULL((void*)0)

1284

&& push_fail_stack (fs, *pidx, candidate, nregs, regs,

1285

eps_via_nodes))

1286

return -2;

1287

1288

/* We know we are going to exit. */

1289

break;

1290

}

1291

}

1292

return dest_node;

1293

}

1294

else

1295

{

1296

int naccepted = 0;

1297

re_token_type_t type = dfa->nodes[node].type;

1298

1299

#ifdef RE_ENABLE_I18N

1300

if (dfa->nodes[node].accept_mb)

1301

naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);

1302

else

1303

#endif /* RE_ENABLE_I18N */

1304

if (type == OP_BACK_REF)

1305

{

1306

int subexp_idx = dfa->nodes[node].opr.idx + 1;

1307

naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;

1308

if (fs != NULL((void*)0))

1309

{

1310

if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)

1311

return -1;

1312

else if (naccepted)

1313

{

1314

char *buf = (char *) re_string_get_buffer (&mctx->input)((&mctx->input)->mbs);

1315

if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,

1316

naccepted) != 0)

1317

return -1;

1318

}

1319

}

1320

1321

if (naccepted == 0)

1322

{

1323

int dest_node;

1324

err = re_node_set_insert (eps_via_nodes, node);

1325

if (BE (err < 0, 0)__builtin_expect (err < 0, 0))

1326

return -2;

1327

dest_node = dfa->edests[node].elems[0];

1328

if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1329

dest_node))

1330

return dest_node;

1331

}

1332

}

1333

1334

if (naccepted != 0

1335

|| check_node_accept (mctx, dfa->nodes + node, *pidx))

1336

{

1337

int dest_node = dfa->nexts[node];

1338

*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;

1339

if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL((void*)0)

1340

|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1341

dest_node)))

1342

return -1;

1343

re_node_set_empty (eps_via_nodes)((eps_via_nodes)->nelem = 0);

1344

return dest_node;

1345

}

1346

}

1347

return -1;

1348

}

1349

1350

static reg_errcode_t

1351

internal_function

1352

push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,

1353

int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)

1354

{

1355

reg_errcode_t err;

1356

int num = fs->num++;

1357

if (fs->num == fs->alloc)

1358

{

1359

struct re_fail_stack_ent_t *new_array;

1360

new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)

1361

* fs->alloc * 2));

1362

if (new_array == NULL((void*)0))

1363

return REG_ESPACE;

1364

fs->alloc *= 2;

1365

fs->stack = new_array;

1366

}

1367

fs->stack[num].idx = str_idx;

1368

fs->stack[num].node = dest_node;

1369

fs->stack[num].regs = re_malloc (regmatch_t, nregs)((regmatch_t *) malloc ((nregs) * sizeof (regmatch_t)));

1370

if (fs->stack[num].regs == NULL((void*)0))

1371

return REG_ESPACE;

1372

memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs)__builtin___memcpy_chk (fs->stack[num].regs, regs, sizeof (
regmatch_t) * nregs, __builtin_object_size (fs->stack[num]
.regs, 0));

1373

err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);

1374

return err;

1375

}

1376

1377

static int

1378

internal_function

1379

pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,

1380

regmatch_t *regs, re_node_set *eps_via_nodes)

1381

{

1382

int num = --fs->num;

1383

assert (num >= 0)(__builtin_expect(!(num >= 0), 0) ? __assert_rtn(__func__,
"compat/regex/regexec.c", 1383, "num >= 0") : (void)0);

1384

*pidx = fs->stack[num].idx;

1385

memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs)__builtin___memcpy_chk (regs, fs->stack[num].regs, sizeof (
regmatch_t) * nregs, __builtin_object_size (regs, 0));

1386

re_node_set_free (eps_via_nodes)free ((eps_via_nodes)->elems);

1387

re_free (fs->stack[num].regs)free (fs->stack[num].regs);

1388

*eps_via_nodes = fs->stack[num].eps_via_nodes;

1389

return fs->stack[num].node;

1390

}

1391

1392

/* Set the positions where the subexpressions are starts/ends to registers

1393

PMATCH.

1394

Note: We assume that pmatch[0] is already set, and

1395

pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */

1396

1397

static reg_errcode_t

1398

internal_function

1399

set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,

1400

regmatch_t *pmatch, int fl_backtrack)

1401

{

1402

const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;

1403

int idx, cur_node;

1404

re_node_set eps_via_nodes;

1405

struct re_fail_stack_t *fs;

1406

struct re_fail_stack_t fs_body = { 0, 2, NULL((void*)0) };

1407

regmatch_t *prev_idx_match;

1408

int prev_idx_match_malloced = 0;

1409

1410

#ifdef DEBUG

1411

assert (nmatch > 1)(__builtin_expect(!(nmatch > 1), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 1411, "nmatch > 1") : (void)0);

1412

assert (mctx->state_log != NULL)(__builtin_expect(!(mctx->state_log != ((void*)0)), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 1412, "mctx->state_log != NULL"
) : (void)0);

1413

#endif

1414

if (fl_backtrack)

1415

{

1416

fs = &fs_body;

1417

fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc)((struct re_fail_stack_ent_t *) malloc ((fs->alloc) * sizeof
(struct re_fail_stack_ent_t)));

1418

if (fs->stack == NULL((void*)0))

1419

return REG_ESPACE;

1420

}

1421

else

1422

fs = NULL((void*)0);

1423

1424

cur_node = dfa->init_node;

1425

re_node_set_init_empty (&eps_via_nodes)__builtin___memset_chk (&eps_via_nodes, '\0', sizeof (re_node_set
), __builtin_object_size (&eps_via_nodes, 0));

1426

1427

#ifdef HAVE_ALLOCA

1428

if (__libc_use_alloca (nmatch * sizeof (regmatch_t))0)

1429

prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t))__builtin_alloca(nmatch * sizeof (regmatch_t));

1430

else

1431

#endif

1432

{

1433

prev_idx_match = re_malloc (regmatch_t, nmatch)((regmatch_t *) malloc ((nmatch) * sizeof (regmatch_t)));

1434

if (prev_idx_match == NULL((void*)0))

1435

{

1436

free_fail_stack_return (fs);

1437

return REG_ESPACE;

1438

}

1439

prev_idx_match_malloced = 1;

1440

}

1441

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch)__builtin___memcpy_chk (prev_idx_match, pmatch, sizeof (regmatch_t
) * nmatch, __builtin_object_size (prev_idx_match, 0));

1442

1443

for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)

1444

{

1445

update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);

1446

1447

if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)

1448

{

1449

int reg_idx;

1450

if (fs)

1451

{

1452

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

1453

if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)

1454

break;

1455

if (reg_idx == nmatch)

1456

{

1457

re_node_set_free (&eps_via_nodes)free ((&eps_via_nodes)->elems);

1458

if (prev_idx_match_malloced)

1459

re_free (prev_idx_match)free (prev_idx_match);

1460

return free_fail_stack_return (fs);

1461

}

1462

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1463

&eps_via_nodes);

1464

}

1465

else

1466

{

1467

re_node_set_free (&eps_via_nodes)free ((&eps_via_nodes)->elems);

1468

if (prev_idx_match_malloced)

1469

re_free (prev_idx_match)free (prev_idx_match);

1470

return REG_NOERROR;

1471

}

1472

}

1473

1474

/* Proceed to next node. */

1475

cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,

1476

&eps_via_nodes, fs);

1477

1478

if (BE (cur_node < 0, 0)__builtin_expect (cur_node < 0, 0))

1479

{

1480

if (BE (cur_node == -2, 0)__builtin_expect (cur_node == -2, 0))

1481

{

1482

re_node_set_free (&eps_via_nodes)free ((&eps_via_nodes)->elems);

1483

if (prev_idx_match_malloced)

1484

re_free (prev_idx_match)free (prev_idx_match);

1485

free_fail_stack_return (fs);

1486

return REG_ESPACE;

1487

}

1488

if (fs)

1489

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1490

&eps_via_nodes);

1491

else

1492

{

1493

re_node_set_free (&eps_via_nodes)free ((&eps_via_nodes)->elems);

1494

if (prev_idx_match_malloced)

1495

re_free (prev_idx_match)free (prev_idx_match);

1496

return REG_NOMATCH;

1497

}

1498

}

1499

}

1500

re_node_set_free (&eps_via_nodes)free ((&eps_via_nodes)->elems);

1501

if (prev_idx_match_malloced)

1502

re_free (prev_idx_match)free (prev_idx_match);

1503

return free_fail_stack_return (fs);

1504

}

1505

1506

static reg_errcode_t

1507

internal_function

1508

free_fail_stack_return (struct re_fail_stack_t *fs)

1509

{

1510

if (fs)

1511

{

1512

int fs_idx;

1513

for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)

1514

{

1515

re_node_set_free (&fs->stack[fs_idx].eps_via_nodes)free ((&fs->stack[fs_idx].eps_via_nodes)->elems);

1516

re_free (fs->stack[fs_idx].regs)free (fs->stack[fs_idx].regs);

1517

}

1518

re_free (fs->stack)free (fs->stack);

1519

}

1520

return REG_NOERROR;

1521

}

1522

1523

static void

1524

internal_function

1525

update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

1526

regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)

1527

{

1528

int type = dfa->nodes[cur_node].type;

1529

if (type == OP_OPEN_SUBEXP)

1530

{

1531

int reg_num = dfa->nodes[cur_node].opr.idx + 1;

1532

1533

/* We are at the first node of this sub expression. */

1534

if (reg_num < nmatch)

1535

{

1536

pmatch[reg_num].rm_so = cur_idx;

1537

pmatch[reg_num].rm_eo = -1;

1538

}

1539

}

1540

else if (type == OP_CLOSE_SUBEXP)

1541

{

1542

int reg_num = dfa->nodes[cur_node].opr.idx + 1;

1543

if (reg_num < nmatch)

1544

{

1545

/* We are at the last node of this sub expression. */

1546

if (pmatch[reg_num].rm_so < cur_idx)

1547

{

1548

pmatch[reg_num].rm_eo = cur_idx;

1549

/* This is a non-empty match or we are not inside an optional

1550

subexpression. Accept this right away. */

1551

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch)__builtin___memcpy_chk (prev_idx_match, pmatch, sizeof (regmatch_t
) * nmatch, __builtin_object_size (prev_idx_match, 0));

1552

}

1553

else

1554

{

1555

if (dfa->nodes[cur_node].opt_subexp

1556

&& prev_idx_match[reg_num].rm_so != -1)

1557

/* We transited through an empty match for an optional

1558

subexpression, like (a?)*, and this is not the subexp's

1559

first match. Copy back the old content of the registers

1560

so that matches of an inner subexpression are undone as

1561

well, like in ((a?))*. */

1562

memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch)__builtin___memcpy_chk (pmatch, prev_idx_match, sizeof (regmatch_t
) * nmatch, __builtin_object_size (pmatch, 0));

1563

else

1564

/* We completed a subexpression, but it may be part of

1565

an optional one, so do not update PREV_IDX_MATCH. */

1566

pmatch[reg_num].rm_eo = cur_idx;

1567

}

1568

}

1569

}

1570

}

1571

1572

/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0

1573

and sift the nodes in each states according to the following rules.

1574

Updated state_log will be wrote to STATE_LOG.

1575

1576

Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...

1577

1. When STR_IDX == MATCH_LAST(the last index in the state_log):

1578

If `a' isn't the LAST_NODE and `a' can't epsilon transit to

1579

the LAST_NODE, we throw away the node `a'.

1580

2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts

1581

string `s' and transit to `b':

1582

i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw

1583

away the node `a'.

1584

ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is

1585

thrown away, we throw away the node `a'.

1586

3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':

1587

i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the

1588

node `a'.

1589

ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,

1590

we throw away the node `a'. */

1591

1592

#define STATE_NODE_CONTAINS(state,node)((state) != ((void*)0) && re_node_set_contains (&
(state)->nodes, node)) \

1593

((state) != NULL((void*)0) && re_node_set_contains (&(state)->nodes, node))

1594

1595

static reg_errcode_t

1596

internal_function

1597

sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)

1598

{

1599

reg_errcode_t err;

1600

int null_cnt = 0;

1601

int str_idx = sctx->last_str_idx;

1602

re_node_set cur_dest;

1603

1604

#ifdef DEBUG

1605

assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL)(__builtin_expect(!(mctx->state_log != ((void*)0) &&
mctx->state_log[str_idx] != ((void*)0)), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 1605, "mctx->state_log != NULL && mctx->state_log[str_idx] != NULL"
) : (void)0);

1606

#endif

1607

1608

/* Build sifted state_log[str_idx]. It has the nodes which can epsilon

1609

transit to the last_node and the last_node itself. */

1610

err = re_node_set_init_1 (&cur_dest, sctx->last_node);

1611

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1612

return err;

1613

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1614

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1615

goto free_return;

1616

1617

/* Then check each states in the state_log. */

1618

while (str_idx > 0)

1619

{

1620

/* Update counters. */

1621

null_cnt = (sctx->sifted_states[str_idx] == NULL((void*)0)) ? null_cnt + 1 : 0;

1622

if (null_cnt > mctx->max_mb_elem_len)

1623

{

1624

memset (sctx->sifted_states, '\0',__builtin___memset_chk (sctx->sifted_states, '\0', sizeof (
re_dfastate_t *) * str_idx, __builtin_object_size (sctx->sifted_states
, 0))

1625

sizeof (re_dfastate_t *) * str_idx)__builtin___memset_chk (sctx->sifted_states, '\0', sizeof (
re_dfastate_t *) * str_idx, __builtin_object_size (sctx->sifted_states
, 0));

1626

re_node_set_free (&cur_dest)free ((&cur_dest)->elems);

1627

return REG_NOERROR;

1628

}

1629

re_node_set_empty (&cur_dest)((&cur_dest)->nelem = 0);

1630

--str_idx;

1631

1632

if (mctx->state_log[str_idx])

1633

{

1634

err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);

1635

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1636

goto free_return;

1637

}

1638

1639

/* Add all the nodes which satisfy the following conditions:

1640

- It can epsilon transit to a node in CUR_DEST.

1641

- It is in CUR_SRC.

1642

And update state_log. */

1643

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1644

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1645

goto free_return;

1646

}

1647

err = REG_NOERROR;

1648

free_return:

1649

re_node_set_free (&cur_dest)free ((&cur_dest)->elems);

1650

return err;

1651

}

1652

1653

static reg_errcode_t

1654

internal_function

1655

build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,

1656

int str_idx, re_node_set *cur_dest)

1657

{

1658

const re_dfa_t *const dfa = mctx->dfa;

1659

const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;

1660

int i;

1661

1662

/* Then build the next sifted state.

1663

We build the next sifted state on `cur_dest', and update

1664

`sifted_states[str_idx]' with `cur_dest'.

1665

Note:

1666

`cur_dest' is the sifted state from `state_log[str_idx + 1]'.

1667

`cur_src' points the node_set of the old `state_log[str_idx]'

1668

(with the epsilon nodes pre-filtered out). */

1669

for (i = 0; i < cur_src->nelem; i++)

1670

{

1671

int prev_node = cur_src->elems[i];

1672

int naccepted = 0;

1673

int ret;

1674

1675

#ifdef DEBUG

1676

re_token_type_t type = dfa->nodes[prev_node].type;

1677

assert (!IS_EPSILON_NODE (type))(__builtin_expect(!(!((type) & 8)), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 1677, "!IS_EPSILON_NODE (type)") :
(void)0);

1678

#endif

1679

#ifdef RE_ENABLE_I18N

1680

/* If the node may accept `multi byte'. */

1681

if (dfa->nodes[prev_node].accept_mb)

1682

naccepted = sift_states_iter_mb (mctx, sctx, prev_node,

1683

str_idx, sctx->last_str_idx);

1684

#endif /* RE_ENABLE_I18N */

1685

1686

/* We don't check backreferences here.

1687

See update_cur_sifted_state(). */

1688

if (!naccepted

1689

&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)

1690

&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],((sctx->sifted_states[str_idx + 1]) != ((void*)0) &&
re_node_set_contains (&(sctx->sifted_states[str_idx +
1])->nodes, dfa->nexts[prev_node]))

1691

dfa->nexts[prev_node])((sctx->sifted_states[str_idx + 1]) != ((void*)0) &&
re_node_set_contains (&(sctx->sifted_states[str_idx +
1])->nodes, dfa->nexts[prev_node])))

1692

naccepted = 1;

1693

1694

if (naccepted == 0)

1695

continue;

1696

1697

if (sctx->limits.nelem)

1698

{

1699

int to_idx = str_idx + naccepted;

1700

if (check_dst_limits (mctx, &sctx->limits,

1701

dfa->nexts[prev_node], to_idx,

1702

prev_node, str_idx))

1703

continue;

1704

}

1705

ret = re_node_set_insert (cur_dest, prev_node);

1706

if (BE (ret == -1, 0)__builtin_expect (ret == -1, 0))

1707

return REG_ESPACE;

1708

}

1709

1710

return REG_NOERROR;

1711

}

1712

1713

/* Helper functions. */

1714

1715

static reg_errcode_t

1716

internal_function

1717

clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)

1718

{

1719

int top = mctx->state_log_top;

1720

1721

if (next_state_log_idx >= mctx->input.bufs_len

1722

|| (next_state_log_idx >= mctx->input.valid_len

1723

&& mctx->input.valid_len < mctx->input.len))

1724

{

1725

reg_errcode_t err;

1726

err = extend_buffers (mctx);

1727

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1728

return err;

1729

}

1730

1731

if (top < next_state_log_idx)

1732

{

1733

memset (mctx->state_log + top + 1, '\0',__builtin___memset_chk (mctx->state_log + top + 1, '\0', sizeof
(re_dfastate_t *) * (next_state_log_idx - top), __builtin_object_size
(mctx->state_log + top + 1, 0))

1734

sizeof (re_dfastate_t *) * (next_state_log_idx - top))__builtin___memset_chk (mctx->state_log + top + 1, '\0', sizeof
(re_dfastate_t *) * (next_state_log_idx - top), __builtin_object_size
(mctx->state_log + top + 1, 0));

1735

mctx->state_log_top = next_state_log_idx;

1736

}

1737

return REG_NOERROR;

1738

}

1739

1740

static reg_errcode_t

1741

internal_function

1742

merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,

1743

re_dfastate_t **src, int num)

1744

{

1745

int st_idx;

1746

reg_errcode_t err;

1747

for (st_idx = 0; st_idx < num; ++st_idx)

1748

{

1749

if (dst[st_idx] == NULL((void*)0))

1750

dst[st_idx] = src[st_idx];

1751

else if (src[st_idx] != NULL((void*)0))

1752

{

1753

re_node_set merged_set;

1754

err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,

1755

&src[st_idx]->nodes);

1756

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1757

return err;

1758

dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);

1759

re_node_set_free (&merged_set)free ((&merged_set)->elems);

1760

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1761

return err;

1762

}

1763

}

1764

return REG_NOERROR;

1765

}

1766

1767

static reg_errcode_t

1768

internal_function

1769

update_cur_sifted_state (const re_match_context_t *mctx,

1770

re_sift_context_t *sctx, int str_idx,

1771

re_node_set *dest_nodes)

1772

{

1773

const re_dfa_t *const dfa = mctx->dfa;

1774

reg_errcode_t err = REG_NOERROR;

1775

const re_node_set *candidates;

1776

candidates = ((mctx->state_log[str_idx] == NULL((void*)0)) ? NULL((void*)0)

1777

: &mctx->state_log[str_idx]->nodes);

1778

1779

if (dest_nodes->nelem == 0)

1780

sctx->sifted_states[str_idx] = NULL((void*)0);

1781

else

1782

{

1783

if (candidates)

1784

{

1785

/* At first, add the nodes which can epsilon transit to a node in

1786

DEST_NODE. */

1787

err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);

1788

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1789

return err;

1790

1791

/* Then, check the limitations in the current sift_context. */

1792

if (sctx->limits.nelem)

1793

{

1794

err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,

1795

mctx->bkref_ents, str_idx);

1796

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1797

return err;

1798

}

1799

}

1800

1801

sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);

1802

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1803

return err;

1804

}

1805

1806

if (candidates && mctx->state_log[str_idx]->has_backref)

1807

{

1808

err = sift_states_bkref (mctx, sctx, str_idx, candidates);

1809

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1810

return err;

1811

}

1812

return REG_NOERROR;

1813

}

1814

1815

static reg_errcode_t

1816

internal_function

1817

add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,

1818

const re_node_set *candidates)

1819

{

1820

reg_errcode_t err = REG_NOERROR;

1821

int i;

1822

1823

re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);

1824

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1825

return err;

1826

1827

if (!state->inveclosure.alloc)

1828

{

1829

err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);

1830

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1831

return REG_ESPACE;

1832

for (i = 0; i < dest_nodes->nelem; i++)

1833

{

1834

err = re_node_set_merge (&state->inveclosure,

1835

dfa->inveclosures + dest_nodes->elems[i]);

1836

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1837

return REG_ESPACE;

1838

}

1839

}

1840

return re_node_set_add_intersect (dest_nodes, candidates,

1841

&state->inveclosure);

1842

}

1843

1844

static reg_errcode_t

1845

internal_function

1846

sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,

1847

const re_node_set *candidates)

1848

{

1849

int ecl_idx;

1850

reg_errcode_t err;

1851

re_node_set *inv_eclosure = dfa->inveclosures + node;

1852

re_node_set except_nodes;

1853

re_node_set_init_empty (&except_nodes)__builtin___memset_chk (&except_nodes, '\0', sizeof (re_node_set
), __builtin_object_size (&except_nodes, 0));

1854

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1855

{

1856

int cur_node = inv_eclosure->elems[ecl_idx];

1857

if (cur_node == node)

1858

continue;

1859

if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)((dfa->nodes[cur_node].type) & 8))

1860

{

1861

int edst1 = dfa->edests[cur_node].elems[0];

1862

int edst2 = ((dfa->edests[cur_node].nelem > 1)

1863

? dfa->edests[cur_node].elems[1] : -1);

1864

if ((!re_node_set_contains (inv_eclosure, edst1)

1865

&& re_node_set_contains (dest_nodes, edst1))

1866

|| (edst2 > 0

1867

&& !re_node_set_contains (inv_eclosure, edst2)

1868

&& re_node_set_contains (dest_nodes, edst2)))

1869

{

1870

err = re_node_set_add_intersect (&except_nodes, candidates,

1871

dfa->inveclosures + cur_node);

1872

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

1873

{

1874

re_node_set_free (&except_nodes)free ((&except_nodes)->elems);

1875

return err;

1876

}

1877

}

1878

}

1879

}

1880

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1881

{

1882

int cur_node = inv_eclosure->elems[ecl_idx];

1883

if (!re_node_set_contains (&except_nodes, cur_node))

1884

{

1885

int idx = re_node_set_contains (dest_nodes, cur_node) - 1;

1886

re_node_set_remove_at (dest_nodes, idx);

1887

}

1888

}

1889

re_node_set_free (&except_nodes)free ((&except_nodes)->elems);

1890

return REG_NOERROR;

1891

}

1892

1893

static int

1894

internal_function

1895

check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,

1896

int dst_node, int dst_idx, int src_node, int src_idx)

1897

{

1898

const re_dfa_t *const dfa = mctx->dfa;

1899

int lim_idx, src_pos, dst_pos;

1900

1901

int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);

1902

int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);

1903

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

1904

{

1905

int subexp_idx;

1906

struct re_backref_cache_entry *ent;

1907

ent = mctx->bkref_ents + limits->elems[lim_idx];

1908

subexp_idx = dfa->nodes[ent->node].opr.idx;

1909

1910

dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1911

subexp_idx, dst_node, dst_idx,

1912

dst_bkref_idx);

1913

src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1914

subexp_idx, src_node, src_idx,

1915

src_bkref_idx);

1916

1917

/* In case of:

1918

<src> <dst> ( <subexp> )

1919

( <subexp> ) <src> <dst>

1920

( <subexp1> <src> <subexp2> <dst> <subexp3> ) */

1921

if (src_pos == dst_pos)

1922

continue; /* This is unrelated limitation. */

1923

else

1924

return 1;

1925

}

1926

return 0;

1927

}

1928

1929

static int

1930

internal_function

1931

check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,

1932

int subexp_idx, int from_node, int bkref_idx)

1933

{

1934

const re_dfa_t *const dfa = mctx->dfa;

1935

const re_node_set *eclosures = dfa->eclosures + from_node;

1936

int node_idx;

1937

1938

/* Else, we are on the boundary: examine the nodes on the epsilon

1939

closure. */

1940

for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)

1941

{

1942

int node = eclosures->elems[node_idx];

1943

switch (dfa->nodes[node].type)

1944

{

1945

case OP_BACK_REF:

1946

if (bkref_idx != -1)

1947

{

1948

struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;

1949

1950

{

1951

int dst, cpos;

1952

1953

if (ent->node != node)

1954

continue;

1955

1956

if (subexp_idx < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8)

1957

&& !(ent->eps_reachable_subexps_map

1958

& ((bitset_word_t) 1 << subexp_idx)))

1959

continue;

1960

1961

/* Recurse trying to reach the OP_OPEN_SUBEXP and

1962

OP_CLOSE_SUBEXP cases below. But, if the

1963

destination node is the same node as the source

1964

node, don't recurse because it would cause an

1965

infinite loop: a regex that exhibits this behavior

1966

is ()\1*\1* */

1967

dst = dfa->edests[node].elems[0];

1968

if (dst == from_node)

1969

{

1970

if (boundaries & 1)

1971

return -1;

1972

else /* if (boundaries & 2) */

1973

return 0;

1974

}

1975

1976

cpos =

1977

check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

1978

dst, bkref_idx);

1979

if (cpos == -1 /* && (boundaries & 1) */)

1980

return -1;

1981

if (cpos == 0 && (boundaries & 2))

1982

return 0;

1983

1984

if (subexp_idx < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8))

1985

ent->eps_reachable_subexps_map

1986

&= ~((bitset_word_t) 1 << subexp_idx);

1987

}

1988

while (ent++->more);

1989

}

1990

break;

1991

1992

case OP_OPEN_SUBEXP:

1993

if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)

1994

return -1;

1995

break;

1996

1997

case OP_CLOSE_SUBEXP:

1998

if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)

1999

return 0;

2000

break;

2001

2002

default:

2003

break;

2004

}

2005

}

2006

2007

return (boundaries & 2) ? 1 : 0;

2008

}

2009

2010

static int

2011

internal_function

2012

check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,

2013

int subexp_idx, int from_node, int str_idx,

2014

int bkref_idx)

2015

{

2016

struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;

2017

int boundaries;

2018

2019

/* If we are outside the range of the subexpression, return -1 or 1. */

2020

if (str_idx < lim->subexp_from)

2021

return -1;

2022

2023

if (lim->subexp_to < str_idx)

2024

return 1;

2025

2026

/* If we are within the subexpression, return 0. */

2027

boundaries = (str_idx == lim->subexp_from);

2028

boundaries |= (str_idx == lim->subexp_to) << 1;

2029

if (boundaries == 0)

2030

return 0;

2031

2032

/* Else, examine epsilon closure. */

2033

return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

2034

from_node, bkref_idx);

2035

}

2036

2037

/* Check the limitations of sub expressions LIMITS, and remove the nodes

2038

which are against limitations from DEST_NODES. */

2039

2040

static reg_errcode_t

2041

internal_function

2042

check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,

2043

const re_node_set *candidates, re_node_set *limits,

2044

struct re_backref_cache_entry *bkref_ents, int str_idx)

2045

{

2046

reg_errcode_t err;

2047

int node_idx, lim_idx;

2048

2049

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

2050

{

2051

int subexp_idx;

2052

struct re_backref_cache_entry *ent;

2053

ent = bkref_ents + limits->elems[lim_idx];

2054

2055

if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)

2056

continue; /* This is unrelated limitation. */

2057

2058

subexp_idx = dfa->nodes[ent->node].opr.idx;

2059

if (ent->subexp_to == str_idx)

2060

{

2061

int ops_node = -1;

2062

int cls_node = -1;

2063

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2064

{

2065

int node = dest_nodes->elems[node_idx];

2066

re_token_type_t type = dfa->nodes[node].type;

2067

if (type == OP_OPEN_SUBEXP

2068

&& subexp_idx == dfa->nodes[node].opr.idx)

2069

ops_node = node;

2070

else if (type == OP_CLOSE_SUBEXP

2071

&& subexp_idx == dfa->nodes[node].opr.idx)

2072

cls_node = node;

2073

}

2074

2075

/* Check the limitation of the open subexpression. */

2076

/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */

2077

if (ops_node >= 0)

2078

{

2079

err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,

2080

candidates);

2081

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2082

return err;

2083

}

2084

2085

/* Check the limitation of the close subexpression. */

2086

if (cls_node >= 0)

2087

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2088

{

2089

int node = dest_nodes->elems[node_idx];

2090

if (!re_node_set_contains (dfa->inveclosures + node,

2091

cls_node)

2092

&& !re_node_set_contains (dfa->eclosures + node,

2093

cls_node))

2094

{

2095

/* It is against this limitation.

2096

Remove it form the current sifted state. */

2097

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2098

candidates);

2099

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2100

return err;

2101

--node_idx;

2102

}

2103

}

2104

}

2105

else /* (ent->subexp_to != str_idx) */

2106

{

2107

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2108

{

2109

int node = dest_nodes->elems[node_idx];

2110

re_token_type_t type = dfa->nodes[node].type;

2111

if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)

2112

{

2113

if (subexp_idx != dfa->nodes[node].opr.idx)

2114

continue;

2115

/* It is against this limitation.

2116

Remove it form the current sifted state. */

2117

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2118

candidates);

2119

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2120

return err;

2121

}

2122

}

2123

}

2124

}

2125

return REG_NOERROR;

2126

}

2127

2128

static reg_errcode_t

2129

internal_function

2130

sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,

2131

int str_idx, const re_node_set *candidates)

2132

{

2133

const re_dfa_t *const dfa = mctx->dfa;

2134

reg_errcode_t err;

2135

int node_idx, node;

2136

re_sift_context_t local_sctx;

2137

int first_idx = search_cur_bkref_entry (mctx, str_idx);

2138

2139

if (first_idx == -1)

2140

return REG_NOERROR;

2141

2142

local_sctx.sifted_states = NULL((void*)0); /* Mark that it hasn't been initialized. */

2143

2144

for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)

2145

{

2146

int enabled_idx;

2147

re_token_type_t type;

2148

struct re_backref_cache_entry *entry;

2149

node = candidates->elems[node_idx];

2150

type = dfa->nodes[node].type;

2151

/* Avoid infinite loop for the REs like "()\1+". */

2152

if (node == sctx->last_node && str_idx == sctx->last_str_idx)

2153

continue;

2154

if (type != OP_BACK_REF)

2155

continue;

2156

2157

entry = mctx->bkref_ents + first_idx;

2158

enabled_idx = first_idx;

2159

2160

{

2161

int subexp_len;

2162

int to_idx;

2163

int dst_node;

2164

int ret;

2165

re_dfastate_t *cur_state;

2166

2167

if (entry->node != node)

2168

continue;

2169

subexp_len = entry->subexp_to - entry->subexp_from;

2170

to_idx = str_idx + subexp_len;

2171

dst_node = (subexp_len ? dfa->nexts[node]

2172

: dfa->edests[node].elems[0]);

2173

2174

if (to_idx > sctx->last_str_idx

2175

|| sctx->sifted_states[to_idx] == NULL((void*)0)

2176

|| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)((sctx->sifted_states[to_idx]) != ((void*)0) && re_node_set_contains
(&(sctx->sifted_states[to_idx])->nodes, dst_node))

2177

|| check_dst_limits (mctx, &sctx->limits, node,

2178

str_idx, dst_node, to_idx))

2179

continue;

2180

2181

if (local_sctx.sifted_states == NULL((void*)0))

2182

{

2183

local_sctx = *sctx;

2184

err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);

2185

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2186

goto free_return;

2187

}

2188

local_sctx.last_node = node;

2189

local_sctx.last_str_idx = str_idx;

2190

ret = re_node_set_insert (&local_sctx.limits, enabled_idx);

2191

if (BE (ret < 0, 0)__builtin_expect (ret < 0, 0))

2192

{

2193

err = REG_ESPACE;

2194

goto free_return;

2195

}

2196

cur_state = local_sctx.sifted_states[str_idx];

2197

err = sift_states_backward (mctx, &local_sctx);

2198

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2199

goto free_return;

2200

if (sctx->limited_states != NULL((void*)0))

2201

{

2202

err = merge_state_array (dfa, sctx->limited_states,

2203

local_sctx.sifted_states,

2204

str_idx + 1);

2205

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2206

goto free_return;

2207

}

2208

local_sctx.sifted_states[str_idx] = cur_state;

2209

re_node_set_remove (&local_sctx.limits, enabled_idx)(re_node_set_remove_at (&local_sctx.limits, re_node_set_contains
(&local_sctx.limits, enabled_idx) - 1));

2210

2211

/* mctx->bkref_ents may have changed, reload the pointer. */

2212

entry = mctx->bkref_ents + enabled_idx;

2213

}

2214

while (enabled_idx++, entry++->more);

2215

}

2216

err = REG_NOERROR;

2217

free_return:

2218

if (local_sctx.sifted_states != NULL((void*)0))

2219

{

2220

re_node_set_free (&local_sctx.limits)free ((&local_sctx.limits)->elems);

2221

}

2222

2223

return err;

2224

}

2225

2226

2227

#ifdef RE_ENABLE_I18N

2228

static int

2229

internal_function

2230

sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,

2231

int node_idx, int str_idx, int max_str_idx)

2232

{

2233

const re_dfa_t *const dfa = mctx->dfa;

2234

int naccepted;

2235

/* Check the node can accept `multi byte'. */

2236

naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);

2237

if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&

2238

!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],((sctx->sifted_states[str_idx + naccepted]) != ((void*)0) &&
re_node_set_contains (&(sctx->sifted_states[str_idx +
naccepted])->nodes, dfa->nexts[node_idx]))

2239

dfa->nexts[node_idx])((sctx->sifted_states[str_idx + naccepted]) != ((void*)0) &&
re_node_set_contains (&(sctx->sifted_states[str_idx +
naccepted])->nodes, dfa->nexts[node_idx])))

2240

/* The node can't accept the `multi byte', or the

2241

destination was already thrown away, then the node

2242

couldn't accept the current input `multi byte'. */

2243

naccepted = 0;

2244

/* Otherwise, it is sure that the node could accept

2245

`naccepted' bytes input. */

2246

return naccepted;

2247

}

2248

#endif /* RE_ENABLE_I18N */

2249

2250

2251

/* Functions for state transition. */

2252

2253

/* Return the next state to which the current state STATE will transit by

2254

accepting the current input byte, and update STATE_LOG if necessary.

2255

If STATE can accept a multibyte char/collating element/back reference

2256

update the destination of STATE_LOG. */

2257

2258

static re_dfastate_t *

2259

internal_function

2260

transit_state (reg_errcode_t *err, re_match_context_t *mctx,

2261

re_dfastate_t *state)

2262

{

2263

re_dfastate_t **trtable;

2264

unsigned char ch;

2265

2266

#ifdef RE_ENABLE_I18N

2267

/* If the current state can accept multibyte. */

2268

if (BE (state->accept_mb, 0)__builtin_expect (state->accept_mb, 0))

2269

{

2270

*err = transit_state_mb (mctx, state);

2271

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2272

return NULL((void*)0);

2273

}

2274

#endif /* RE_ENABLE_I18N */

2275

2276

/* Then decide the next state with the single byte. */

2277

#if 0

2278

if (0)

2279

/* don't use transition table */

2280

return transit_state_sb (err, mctx, state);

2281

#endif

2282

2283

/* Use transition table */

2284

ch = re_string_fetch_byte (&mctx->input)((&mctx->input)->mbs[(&mctx->input)->cur_idx
++]);

2285

for (;;)

2286

{

2287

trtable = state->trtable;

2288

if (BE (trtable != NULL, 1)__builtin_expect (trtable != ((void*)0), 1))

2289

return trtable[ch];

2290

2291

trtable = state->word_trtable;

2292

if (BE (trtable != NULL, 1)__builtin_expect (trtable != ((void*)0), 1))

2293

{

2294

unsigned int context;

2295

context

2296

= re_string_context_at (&mctx->input,

2297

re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx) - 1,

2298

mctx->eflags);

2299

if (IS_WORD_CONTEXT (context)((context) & 1))

2300

return trtable[ch + SBC_MAX256];

2301

else

2302

return trtable[ch];

2303

}

2304

2305

if (!build_trtable (mctx->dfa, state))

2306

{

2307

*err = REG_ESPACE;

2308

return NULL((void*)0);

2309

}

2310

2311

/* Retry, we now have a transition table. */

2312

}

2313

}

2314

2315

/* Update the state_log if we need */

2316

static re_dfastate_t *

2317

internal_function

2318

merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,

2319

re_dfastate_t *next_state)

2320

{

2321

const re_dfa_t *const dfa = mctx->dfa;

2322

int cur_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx);

2323

2324

if (cur_idx > mctx->state_log_top)

2325

{

2326

mctx->state_log[cur_idx] = next_state;

2327

mctx->state_log_top = cur_idx;

2328

}

2329

else if (mctx->state_log[cur_idx] == NULL((void*)0))

2330

{

2331

mctx->state_log[cur_idx] = next_state;

2332

}

2333

else

2334

{

2335

re_dfastate_t *pstate;

2336

unsigned int context;

2337

re_node_set next_nodes, *log_nodes, *table_nodes = NULL((void*)0);

2338

/* If (state_log[cur_idx] != 0), it implies that cur_idx is

2339

the destination of a multibyte char/collating element/

2340

back reference. Then the next state is the union set of

2341

these destinations and the results of the transition table. */

2342

pstate = mctx->state_log[cur_idx];

2343

log_nodes = pstate->entrance_nodes;

2344

if (next_state != NULL((void*)0))

2345

{

2346

table_nodes = next_state->entrance_nodes;

2347

*err = re_node_set_init_union (&next_nodes, table_nodes,

2348

log_nodes);

2349

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2350

return NULL((void*)0);

2351

}

2352

else

2353

next_nodes = *log_nodes;

2354

/* Note: We already add the nodes of the initial state,

2355

then we don't need to add them here. */

2356

2357

context = re_string_context_at (&mctx->input,

2358

re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx) - 1,

2359

mctx->eflags);

2360

next_state = mctx->state_log[cur_idx]

2361

= re_acquire_state_context (err, dfa, &next_nodes, context);

2362

/* We don't need to check errors here, since the return value of

2363

this function is next_state and ERR is already set. */

2364

2365

if (table_nodes != NULL((void*)0))

2366

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2367

}

2368

2369

if (BE (dfa->nbackref, 0)__builtin_expect (dfa->nbackref, 0) && next_state != NULL((void*)0))

2370

{

2371

/* Check OP_OPEN_SUBEXP in the current state in case that we use them

2372

later. We must check them here, since the back references in the

2373

next state might use them. */

2374

*err = check_subexp_matching_top (mctx, &next_state->nodes,

2375

cur_idx);

2376

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2377

return NULL((void*)0);

2378

2379

/* If the next state has back references. */

2380

if (next_state->has_backref)

2381

{

2382

*err = transit_state_bkref (mctx, &next_state->nodes);

2383

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2384

return NULL((void*)0);

2385

next_state = mctx->state_log[cur_idx];

2386

}

2387

}

2388

2389

return next_state;

2390

}

2391

2392

/* Skip bytes in the input that correspond to part of a

2393

multi-byte match, then look in the log for a state

2394

from which to restart matching. */

2395

static re_dfastate_t *

2396

internal_function

2397

find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)

2398

{

2399

re_dfastate_t *cur_state;

2400

2401

{

2402

int max = mctx->state_log_top;

2403

int cur_str_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx);

2404

2405

2406

{

2407

if (++cur_str_idx > max)

2408

return NULL((void*)0);

2409

re_string_skip_bytes (&mctx->input, 1)((&mctx->input)->cur_idx += (1));

2410

}

2411

while (mctx->state_log[cur_str_idx] == NULL((void*)0));

2412

2413

cur_state = merge_state_with_log (err, mctx, NULL((void*)0));

2414

}

2415

while (*err == REG_NOERROR && cur_state == NULL((void*)0));

2416

return cur_state;

2417

}

2418

2419

/* Helper functions for transit_state. */

2420

2421

/* From the node set CUR_NODES, pick up the nodes whose types are

2422

OP_OPEN_SUBEXP and which have corresponding back references in the regular

2423

expression. And register them to use them later for evaluating the

2424

correspoding back references. */

2425

2426

static reg_errcode_t

2427

internal_function

2428

check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,

2429

int str_idx)

2430

{

2431

const re_dfa_t *const dfa = mctx->dfa;

2432

int node_idx;

2433

reg_errcode_t err;

2434

2435

/* TODO: This isn't efficient.

2436

Because there might be more than one nodes whose types are

2437

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2438

nodes.

2439

E.g. RE: (a){2} */

2440

for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)

2441

{

2442

int node = cur_nodes->elems[node_idx];

2443

if (dfa->nodes[node].type == OP_OPEN_SUBEXP

2444

&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS(sizeof (bitset_word_t) * 8)

2445

&& (dfa->used_bkref_map

2446

& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))

2447

{

2448

err = match_ctx_add_subtop (mctx, node, str_idx);

2449

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2450

return err;

2451

}

2452

}

2453

return REG_NOERROR;

2454

}

2455

2456

#if 0

2457

/* Return the next state to which the current state STATE will transit by

2458

accepting the current input byte. */

2459

2460

static re_dfastate_t *

2461

transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,

2462

re_dfastate_t *state)

2463

{

2464

const re_dfa_t *const dfa = mctx->dfa;

2465

re_node_set next_nodes;

2466

re_dfastate_t *next_state;

2467

int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx);

2468

unsigned int context;

2469

2470

*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);

2471

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2472

return NULL((void*)0);

2473

for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)

2474

{

2475

int cur_node = state->nodes.elems[node_cnt];

2476

if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))

2477

{

2478

*err = re_node_set_merge (&next_nodes,

2479

dfa->eclosures + dfa->nexts[cur_node]);

2480

if (BE (*err != REG_NOERROR, 0)__builtin_expect (*err != REG_NOERROR, 0))

2481

{

2482

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2483

return NULL((void*)0);

2484

}

2485

}

2486

}

2487

context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);

2488

next_state = re_acquire_state_context (err, dfa, &next_nodes, context);

2489

/* We don't need to check errors here, since the return value of

2490

this function is next_state and ERR is already set. */

2491

2492

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2493

re_string_skip_bytes (&mctx->input, 1)((&mctx->input)->cur_idx += (1));

2494

return next_state;

2495

}

2496

#endif

2497

2498

#ifdef RE_ENABLE_I18N

2499

static reg_errcode_t

2500

internal_function

2501

transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)

2502

{

2503

const re_dfa_t *const dfa = mctx->dfa;

2504

reg_errcode_t err;

2505

int i;

2506

2507

for (i = 0; i < pstate->nodes.nelem; ++i)

2508

{

2509

re_node_set dest_nodes, *new_nodes;

2510

int cur_node_idx = pstate->nodes.elems[i];

2511

int naccepted, dest_idx;

2512

unsigned int context;

2513

re_dfastate_t *dest_state;

2514

2515

if (!dfa->nodes[cur_node_idx].accept_mb)

2516

continue;

2517

2518

if (dfa->nodes[cur_node_idx].constraint)

2519

{

2520

context = re_string_context_at (&mctx->input,

2521

re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx),

2522

mctx->eflags);

2523

if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,((((dfa->nodes[cur_node_idx].constraint) & 0x0004) &&
!((context) & 1)) || (((dfa->nodes[cur_node_idx].constraint
) & 0x0008) && ((context) & 1)) || (((dfa->
nodes[cur_node_idx].constraint) & 0x0020) && !((context
) & (1 << 1))) || (((dfa->nodes[cur_node_idx].constraint
) & 0x0080) && !((context) & (((1 << 1)
<< 1) << 1))))

2524

context)((((dfa->nodes[cur_node_idx].constraint) & 0x0004) &&
!((context) & 1)) || (((dfa->nodes[cur_node_idx].constraint
) & 0x0008) && ((context) & 1)) || (((dfa->
nodes[cur_node_idx].constraint) & 0x0020) && !((context
) & (1 << 1))) || (((dfa->nodes[cur_node_idx].constraint
) & 0x0080) && !((context) & (((1 << 1)
<< 1) << 1)))))

2525

continue;

2526

}

2527

2528

/* How many bytes the node can accept? */

2529

naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,

2530

re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx));

2531

if (naccepted == 0)

2532

continue;

2533

2534

/* The node can accepts `naccepted' bytes. */

2535

dest_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx) + naccepted;

2536

mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted

2537

: mctx->max_mb_elem_len);

2538

err = clean_state_log_if_needed (mctx, dest_idx);

2539

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2540

return err;

2541

#ifdef DEBUG

2542

assert (dfa->nexts[cur_node_idx] != -1)(__builtin_expect(!(dfa->nexts[cur_node_idx] != -1), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 2542, "dfa->nexts[cur_node_idx] != -1"
) : (void)0);

2543

#endif

2544

new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];

2545

2546

dest_state = mctx->state_log[dest_idx];

2547

if (dest_state == NULL((void*)0))

2548

dest_nodes = *new_nodes;

2549

else

2550

{

2551

err = re_node_set_init_union (&dest_nodes,

2552

dest_state->entrance_nodes, new_nodes);

2553

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2554

return err;

2555

}

2556

context = re_string_context_at (&mctx->input, dest_idx - 1,

2557

mctx->eflags);

2558

mctx->state_log[dest_idx]

2559

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2560

if (dest_state != NULL((void*)0))

2561

re_node_set_free (&dest_nodes)free ((&dest_nodes)->elems);

2562

if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)__builtin_expect (mctx->state_log[dest_idx] == ((void*)0) &&
err != REG_NOERROR, 0))

2563

return err;

2564

}

2565

return REG_NOERROR;

2566

}

2567

#endif /* RE_ENABLE_I18N */

2568

2569

static reg_errcode_t

2570

internal_function

2571

transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)

2572

{

2573

const re_dfa_t *const dfa = mctx->dfa;

2574

reg_errcode_t err;

2575

int i;

2576

int cur_str_idx = re_string_cur_idx (&mctx->input)((&mctx->input)->cur_idx);

2577

2578

for (i = 0; i < nodes->nelem; ++i)

2579

{

2580

int dest_str_idx, prev_nelem, bkc_idx;

2581

int node_idx = nodes->elems[i];

2582

unsigned int context;

2583

const re_token_t *node = dfa->nodes + node_idx;

2584

re_node_set *new_dest_nodes;

2585

2586

/* Check whether `node' is a backreference or not. */

2587

if (node->type != OP_BACK_REF)

2588

continue;

2589

2590

if (node->constraint)

2591

{

2592

context = re_string_context_at (&mctx->input, cur_str_idx,

2593

mctx->eflags);

2594

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)((((node->constraint) & 0x0004) && !((context)
& 1)) || (((node->constraint) & 0x0008) &&
((context) & 1)) || (((node->constraint) & 0x0020
) && !((context) & (1 << 1))) || (((node->
constraint) & 0x0080) && !((context) & (((1 <<
1) << 1) << 1)))))

2595

continue;

2596

}

2597

2598

/* `node' is a backreference.

2599

Check the substring which the substring matched. */

2600

bkc_idx = mctx->nbkref_ents;

2601

err = get_subexp (mctx, node_idx, cur_str_idx);

2602

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2603

goto free_return;

2604

2605

/* And add the epsilon closures (which is `new_dest_nodes') of

2606

the backreference to appropriate state_log. */

2607

#ifdef DEBUG

2608

assert (dfa->nexts[node_idx] != -1)(__builtin_expect(!(dfa->nexts[node_idx] != -1), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 2608, "dfa->nexts[node_idx] != -1"
) : (void)0);

2609

#endif

2610

for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)

2611

{

2612

int subexp_len;

2613

re_dfastate_t *dest_state;

2614

struct re_backref_cache_entry *bkref_ent;

2615

bkref_ent = mctx->bkref_ents + bkc_idx;

2616

if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)

2617

continue;

2618

subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;

2619

new_dest_nodes = (subexp_len == 0

2620

? dfa->eclosures + dfa->edests[node_idx].elems[0]

2621

: dfa->eclosures + dfa->nexts[node_idx]);

2622

dest_str_idx = (cur_str_idx + bkref_ent->subexp_to

2623

- bkref_ent->subexp_from);

2624

context = re_string_context_at (&mctx->input, dest_str_idx - 1,

2625

mctx->eflags);

2626

dest_state = mctx->state_log[dest_str_idx];

2627

prev_nelem = ((mctx->state_log[cur_str_idx] == NULL((void*)0)) ? 0

2628

: mctx->state_log[cur_str_idx]->nodes.nelem);

2629

/* Add `new_dest_node' to state_log. */

2630

if (dest_state == NULL((void*)0))

2631

{

2632

mctx->state_log[dest_str_idx]

2633

= re_acquire_state_context (&err, dfa, new_dest_nodes,

2634

context);

2635

if (BE (mctx->state_log[dest_str_idx] == NULL__builtin_expect (mctx->state_log[dest_str_idx] == ((void*
)0) && err != REG_NOERROR, 0)

2636

&& err != REG_NOERROR, 0)__builtin_expect (mctx->state_log[dest_str_idx] == ((void*
)0) && err != REG_NOERROR, 0))

2637

goto free_return;

2638

}

2639

else

2640

{

2641

re_node_set dest_nodes;

2642

err = re_node_set_init_union (&dest_nodes,

2643

dest_state->entrance_nodes,

2644

new_dest_nodes);

2645

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2646

{

2647

re_node_set_free (&dest_nodes)free ((&dest_nodes)->elems);

2648

goto free_return;

2649

}

2650

mctx->state_log[dest_str_idx]

2651

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2652

re_node_set_free (&dest_nodes)free ((&dest_nodes)->elems);

2653

if (BE (mctx->state_log[dest_str_idx] == NULL__builtin_expect (mctx->state_log[dest_str_idx] == ((void*
)0) && err != REG_NOERROR, 0)

2654

&& err != REG_NOERROR, 0)__builtin_expect (mctx->state_log[dest_str_idx] == ((void*
)0) && err != REG_NOERROR, 0))

2655

goto free_return;

2656

}

2657

/* We need to check recursively if the backreference can epsilon

2658

transit. */

2659

if (subexp_len == 0

2660

&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)

2661

{

2662

err = check_subexp_matching_top (mctx, new_dest_nodes,

2663

cur_str_idx);

2664

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2665

goto free_return;

2666

err = transit_state_bkref (mctx, new_dest_nodes);

2667

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2668

goto free_return;

2669

}

2670

}

2671

}

2672

err = REG_NOERROR;

2673

free_return:

2674

return err;

2675

}

2676

2677

/* Enumerate all the candidates which the backreference BKREF_NODE can match

2678

at BKREF_STR_IDX, and register them by match_ctx_add_entry().

2679

Note that we might collect inappropriate candidates here.

2680

However, the cost of checking them strictly here is too high, then we

2681

delay these checking for prune_impossible_nodes(). */

2682

2683

static reg_errcode_t

2684

internal_function

2685

get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)

2686

{

2687

const re_dfa_t *const dfa = mctx->dfa;

2688

int subexp_num, sub_top_idx;

2689

const char *buf = (const char *) re_string_get_buffer (&mctx->input)((&mctx->input)->mbs);

2690

/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */

2691

int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);

2692

if (cache_idx != -1)

2693

{

2694

const struct re_backref_cache_entry *entry

2695

= mctx->bkref_ents + cache_idx;

2696

2697

if (entry->node == bkref_node)

2698

return REG_NOERROR; /* We already checked it. */

2699

while (entry++->more);

2700

}

2701

2702

subexp_num = dfa->nodes[bkref_node].opr.idx;

2703

2704

/* For each sub expression */

2705

for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)

2706

{

2707

reg_errcode_t err;

2708

re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];

2709

re_sub_match_last_t *sub_last;

2710

int sub_last_idx, sl_str, bkref_str_off;

2711

2712

if (dfa->nodes[sub_top->node].opr.idx != subexp_num)

2713

continue; /* It isn't related. */

2714

2715

sl_str = sub_top->str_idx;

2716

bkref_str_off = bkref_str_idx;

2717

/* At first, check the last node of sub expressions we already

2718

evaluated. */

2719

for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)

2720

{

2721

int sl_str_diff;

2722

sub_last = sub_top->lasts[sub_last_idx];

2723

sl_str_diff = sub_last->str_idx - sl_str;

2724

/* The matched string by the sub expression match with the substring

2725

at the back reference? */

2726

if (sl_str_diff > 0)

2727

{

2728

if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)__builtin_expect (bkref_str_off + sl_str_diff > mctx->input
.valid_len, 0))

2729

{

2730

/* Not enough chars for a successful match. */

2731

if (bkref_str_off + sl_str_diff > mctx->input.len)

2732

break;

2733

2734

err = clean_state_log_if_needed (mctx,

2735

bkref_str_off

2736

+ sl_str_diff);

2737

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2738

return err;

2739

buf = (const char *) re_string_get_buffer (&mctx->input)((&mctx->input)->mbs);

2740

}

2741

if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)

2742

/* We don't need to search this sub expression any more. */

2743

break;

2744

}

2745

bkref_str_off += sl_str_diff;

2746

sl_str += sl_str_diff;

2747

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2748

bkref_str_idx);

2749

2750

/* Reload buf, since the preceding call might have reallocated

2751

the buffer. */

2752

buf = (const char *) re_string_get_buffer (&mctx->input)((&mctx->input)->mbs);

2753

2754

if (err == REG_NOMATCH)

2755

continue;

2756

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2757

return err;

2758

}

2759

2760

if (sub_last_idx < sub_top->nlasts)

2761

continue;

2762

if (sub_last_idx > 0)

2763

++sl_str;

2764

/* Then, search for the other last nodes of the sub expression. */

2765

for (; sl_str <= bkref_str_idx; ++sl_str)

2766

{

2767

int cls_node, sl_str_off;

2768

const re_node_set *nodes;

2769

sl_str_off = sl_str - sub_top->str_idx;

2770

/* The matched string by the sub expression match with the substring

2771

at the back reference? */

2772

if (sl_str_off > 0)

2773

{

2774

if (BE (bkref_str_off >= mctx->input.valid_len, 0)__builtin_expect (bkref_str_off >= mctx->input.valid_len
, 0))

2775

{

2776

/* If we are at the end of the input, we cannot match. */

2777

if (bkref_str_off >= mctx->input.len)

2778

break;

2779

2780

err = extend_buffers (mctx);

2781

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2782

return err;

2783

2784

buf = (const char *) re_string_get_buffer (&mctx->input)((&mctx->input)->mbs);

2785

}

2786

if (buf [bkref_str_off++] != buf[sl_str - 1])

2787

break; /* We don't need to search this sub expression

2788

any more. */

2789

}

2790

if (mctx->state_log[sl_str] == NULL((void*)0))

2791

continue;

2792

/* Does this state have a ')' of the sub expression? */

2793

nodes = &mctx->state_log[sl_str]->nodes;

2794

cls_node = find_subexp_node (dfa, nodes, subexp_num,

2795

OP_CLOSE_SUBEXP);

2796

if (cls_node == -1)

2797

continue; /* No. */

2798

if (sub_top->path == NULL((void*)0))

2799

{

2800

sub_top->path = calloc (sizeof (state_array_t),

2801

sl_str - sub_top->str_idx + 1);

2802

if (sub_top->path == NULL((void*)0))

2803

return REG_ESPACE;

2804

}

2805

/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node

2806

in the current context? */

2807

err = check_arrival (mctx, sub_top->path, sub_top->node,

2808

sub_top->str_idx, cls_node, sl_str,

2809

OP_CLOSE_SUBEXP);

2810

if (err == REG_NOMATCH)

2811

continue;

2812

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2813

return err;

2814

sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);

2815

if (BE (sub_last == NULL, 0)__builtin_expect (sub_last == ((void*)0), 0))

2816

return REG_ESPACE;

2817

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2818

bkref_str_idx);

2819

if (err == REG_NOMATCH)

2820

continue;

2821

}

2822

}

2823

return REG_NOERROR;

2824

}

2825

2826

/* Helper functions for get_subexp(). */

2827

2828

/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.

2829

If it can arrive, register the sub expression expressed with SUB_TOP

2830

and SUB_LAST. */

2831

2832

static reg_errcode_t

2833

internal_function

2834

get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,

2835

re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)

2836

{

2837

reg_errcode_t err;

2838

int to_idx;

2839

/* Can the subexpression arrive the back reference? */

2840

err = check_arrival (mctx, &sub_last->path, sub_last->node,

2841

sub_last->str_idx, bkref_node, bkref_str,

2842

OP_OPEN_SUBEXP);

2843

if (err != REG_NOERROR)

2844

return err;

2845

err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,

2846

sub_last->str_idx);

2847

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2848

return err;

2849

to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;

2850

return clean_state_log_if_needed (mctx, to_idx);

2851

}

2852

2853

/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.

2854

Search '(' if FL_OPEN, or search ')' otherwise.

2855

TODO: This function isn't efficient...

2856

Because there might be more than one nodes whose types are

2857

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2858

nodes.

2859

E.g. RE: (a){2} */

2860

2861

static int

2862

internal_function

2863

find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

2864

int subexp_idx, int type)

2865

{

2866

int cls_idx;

2867

for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)

2868

{

2869

int cls_node = nodes->elems[cls_idx];

2870

const re_token_t *node = dfa->nodes + cls_node;

2871

if (node->type == type

2872

&& node->opr.idx == subexp_idx)

2873

return cls_node;

2874

}

2875

return -1;

2876

}

2877

2878

/* Check whether the node TOP_NODE at TOP_STR can arrive to the node

2879

LAST_NODE at LAST_STR. We record the path onto PATH since it will be

2880

heavily reused.

2881

Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */

2882

2883

static reg_errcode_t

2884

internal_function

2885

check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,

2886

int top_str, int last_node, int last_str, int type)

2887

{

2888

const re_dfa_t *const dfa = mctx->dfa;

2889

reg_errcode_t err = REG_NOERROR;

2890

int subexp_num, backup_cur_idx, str_idx, null_cnt;

2891

re_dfastate_t *cur_state = NULL((void*)0);

2892

re_node_set *cur_nodes, next_nodes;

2893

re_dfastate_t **backup_state_log;

2894

unsigned int context;

2895

2896

subexp_num = dfa->nodes[top_node].opr.idx;

2897

/* Extend the buffer if we need. */

2898

if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)__builtin_expect (path->alloc < last_str + mctx->max_mb_elem_len
+ 1, 0))

2899

{

2900

re_dfastate_t **new_array;

2901

int old_alloc = path->alloc;

2902

path->alloc += last_str + mctx->max_mb_elem_len + 1;

2903

new_array = re_realloc (path->array, re_dfastate_t *, path->alloc)((path->array != ((void*)0)) ? (re_dfastate_t * *) realloc
(path->array,(path->alloc)*sizeof(re_dfastate_t *)) : (
re_dfastate_t * *) calloc(path->alloc,sizeof(re_dfastate_t
*)));

2904

if (BE (new_array == NULL, 0)__builtin_expect (new_array == ((void*)0), 0))

2905

{

2906

path->alloc = old_alloc;

2907

return REG_ESPACE;

2908

}

2909

path->array = new_array;

2910

memset (new_array + old_alloc, '\0',__builtin___memset_chk (new_array + old_alloc, '\0', sizeof (
re_dfastate_t *) * (path->alloc - old_alloc), __builtin_object_size
(new_array + old_alloc, 0))

2911

sizeof (re_dfastate_t *) * (path->alloc - old_alloc))__builtin___memset_chk (new_array + old_alloc, '\0', sizeof (
re_dfastate_t *) * (path->alloc - old_alloc), __builtin_object_size
(new_array + old_alloc, 0));

2912

}

2913

2914

str_idx = path->next_idx ? path->next_idx : top_str;

2915

2916

/* Temporary modify MCTX. */

2917

backup_state_log = mctx->state_log;

2918

backup_cur_idx = mctx->input.cur_idx;

2919

mctx->state_log = path->array;

2920

mctx->input.cur_idx = str_idx;

2921

2922

/* Setup initial node set. */

2923

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

2924

if (str_idx == top_str)

2925

{

2926

err = re_node_set_init_1 (&next_nodes, top_node);

2927

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2928

return err;

2929

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

2930

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2931

{

2932

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2933

return err;

2934

}

2935

}

2936

else

2937

{

2938

cur_state = mctx->state_log[str_idx];

2939

if (cur_state && cur_state->has_backref)

2940

{

2941

err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);

2942

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2943

return err;

2944

}

2945

else

2946

re_node_set_init_empty (&next_nodes)__builtin___memset_chk (&next_nodes, '\0', sizeof (re_node_set
), __builtin_object_size (&next_nodes, 0));

2947

}

2948

if (str_idx == top_str || (cur_state && cur_state->has_backref))

2949

{

2950

if (next_nodes.nelem)

2951

{

2952

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

2953

subexp_num, type);

2954

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2955

{

2956

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2957

return err;

2958

}

2959

}

2960

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

2961

if (BE (cur_state == NULL && err != REG_NOERROR, 0)__builtin_expect (cur_state == ((void*)0) && err != REG_NOERROR
, 0))

2962

{

2963

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2964

return err;

2965

}

2966

mctx->state_log[str_idx] = cur_state;

2967

}

2968

2969

for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)

2970

{

2971

re_node_set_empty (&next_nodes)((&next_nodes)->nelem = 0);

2972

if (mctx->state_log[str_idx + 1])

2973

{

2974

err = re_node_set_merge (&next_nodes,

2975

&mctx->state_log[str_idx + 1]->nodes);

2976

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2977

{

2978

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2979

return err;

2980

}

2981

}

2982

if (cur_state)

2983

{

2984

err = check_arrival_add_next_nodes (mctx, str_idx,

2985

&cur_state->non_eps_nodes,

2986

&next_nodes);

2987

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2988

{

2989

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

2990

return err;

2991

}

2992

}

2993

++str_idx;

2994

if (next_nodes.nelem)

2995

{

2996

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

2997

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

2998

{

2999

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

3000

return err;

3001

}

3002

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

3003

subexp_num, type);

3004

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3005

{

3006

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

3007

return err;

3008

}

3009

}

3010

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

3011

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

3012

if (BE (cur_state == NULL && err != REG_NOERROR, 0)__builtin_expect (cur_state == ((void*)0) && err != REG_NOERROR
, 0))

3013

{

3014

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

3015

return err;

3016

}

3017

mctx->state_log[str_idx] = cur_state;

3018

null_cnt = cur_state == NULL((void*)0) ? null_cnt + 1 : 0;

3019

}

3020

re_node_set_free (&next_nodes)free ((&next_nodes)->elems);

3021

cur_nodes = (mctx->state_log[last_str] == NULL((void*)0) ? NULL((void*)0)

3022

: &mctx->state_log[last_str]->nodes);

3023

path->next_idx = str_idx;

3024

3025

/* Fix MCTX. */

3026

mctx->state_log = backup_state_log;

3027

mctx->input.cur_idx = backup_cur_idx;

3028

3029

/* Then check the current node set has the node LAST_NODE. */

3030

if (cur_nodes != NULL((void*)0) && re_node_set_contains (cur_nodes, last_node))

3031

return REG_NOERROR;

3032

3033

return REG_NOMATCH;

3034

}

3035

3036

/* Helper functions for check_arrival. */

3037

3038

/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them

3039

to NEXT_NODES.

3040

TODO: This function is similar to the functions transit_state*(),

3041

however this function has many additional works.

3042

Can't we unify them? */

3043

3044

static reg_errcode_t

3045

internal_function

3046

check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,

3047

re_node_set *cur_nodes, re_node_set *next_nodes)

3048

{

3049

const re_dfa_t *const dfa = mctx->dfa;

3050

int result;

3051

int cur_idx;

3052

#ifdef RE_ENABLE_I18N

3053

reg_errcode_t err = REG_NOERROR;

3054

#endif

3055

re_node_set union_set;

3056

re_node_set_init_empty (&union_set)__builtin___memset_chk (&union_set, '\0', sizeof (re_node_set
), __builtin_object_size (&union_set, 0));

3057

for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)

3058

{

3059

int naccepted = 0;

3060

int cur_node = cur_nodes->elems[cur_idx];

3061

#ifdef DEBUG

3062

re_token_type_t type = dfa->nodes[cur_node].type;

3063

assert (!IS_EPSILON_NODE (type))(__builtin_expect(!(!((type) & 8)), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 3063, "!IS_EPSILON_NODE (type)") :
(void)0);

3064

#endif

3065

#ifdef RE_ENABLE_I18N

3066

/* If the node may accept `multi byte'. */

3067

if (dfa->nodes[cur_node].accept_mb)

3068

{

3069

naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,

3070

str_idx);

3071

if (naccepted > 1)

3072

{

3073

re_dfastate_t *dest_state;

3074

int next_node = dfa->nexts[cur_node];

3075

int next_idx = str_idx + naccepted;

3076

dest_state = mctx->state_log[next_idx];

3077

re_node_set_empty (&union_set)((&union_set)->nelem = 0);

3078

if (dest_state)

3079

{

3080

err = re_node_set_merge (&union_set, &dest_state->nodes);

3081

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3082

{

3083

re_node_set_free (&union_set)free ((&union_set)->elems);

3084

return err;

3085

}

3086

}

3087

result = re_node_set_insert (&union_set, next_node);

3088

if (BE (result < 0, 0)__builtin_expect (result < 0, 0))

3089

{

3090

re_node_set_free (&union_set)free ((&union_set)->elems);

3091

return REG_ESPACE;

3092

}

3093

mctx->state_log[next_idx] = re_acquire_state (&err, dfa,

3094

&union_set);

3095

if (BE (mctx->state_log[next_idx] == NULL__builtin_expect (mctx->state_log[next_idx] == ((void*)0) &&
err != REG_NOERROR, 0)

3096

&& err != REG_NOERROR, 0)__builtin_expect (mctx->state_log[next_idx] == ((void*)0) &&
err != REG_NOERROR, 0))

3097

{

3098

re_node_set_free (&union_set)free ((&union_set)->elems);

3099

return err;

3100

}

3101

}

3102

}

3103

#endif /* RE_ENABLE_I18N */

3104

if (naccepted

3105

|| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))

3106

{

3107

result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);

3108

if (BE (result < 0, 0)__builtin_expect (result < 0, 0))

3109

{

3110

re_node_set_free (&union_set)free ((&union_set)->elems);

3111

return REG_ESPACE;

3112

}

3113

}

3114

}

3115

re_node_set_free (&union_set)free ((&union_set)->elems);

3116

return REG_NOERROR;

3117

}

3118

3119

/* For all the nodes in CUR_NODES, add the epsilon closures of them to

3120

CUR_NODES, however exclude the nodes which are:

3121

- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.

3122

- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.

3123

3124

3125

static reg_errcode_t

3126

internal_function

3127

check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,

3128

int ex_subexp, int type)

3129

{

3130

reg_errcode_t err;

3131

int idx, outside_node;

3132

re_node_set new_nodes;

3133

#ifdef DEBUG

3134

assert (cur_nodes->nelem)(__builtin_expect(!(cur_nodes->nelem), 0) ? __assert_rtn(__func__
, "compat/regex/regexec.c", 3134, "cur_nodes->nelem") : (void
)0);

3135

#endif

3136

err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);

3137

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3138

return err;

3139

/* Create a new node set NEW_NODES with the nodes which are epsilon

3140

closures of the node in CUR_NODES. */

3141

3142

for (idx = 0; idx < cur_nodes->nelem; ++idx)

3143

{

3144

int cur_node = cur_nodes->elems[idx];

3145

const re_node_set *eclosure = dfa->eclosures + cur_node;

3146

outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);

3147

if (outside_node == -1)

3148

{

3149

/* There are no problematic nodes, just merge them. */

3150

err = re_node_set_merge (&new_nodes, eclosure);

3151

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3152

{

3153

re_node_set_free (&new_nodes)free ((&new_nodes)->elems);

3154

return err;

3155

}

3156

}

3157

else

3158

{

3159

/* There are problematic nodes, re-calculate incrementally. */

3160

err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,

3161

ex_subexp, type);

3162

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3163

{

3164

re_node_set_free (&new_nodes)free ((&new_nodes)->elems);

3165

return err;

3166

}

3167

}

3168

}

3169

re_node_set_free (cur_nodes)free ((cur_nodes)->elems);

3170

*cur_nodes = new_nodes;

3171

return REG_NOERROR;

3172

}

3173

3174

/* Helper function for check_arrival_expand_ecl.

3175

Check incrementally the epsilon closure of TARGET, and if it isn't

3176

problematic append it to DST_NODES. */

3177

3178

static reg_errcode_t

3179

internal_function

3180

check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,

3181

int target, int ex_subexp, int type)

3182

{

3183

int cur_node;

3184

for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)

3185

{

3186

int err;

3187

3188

if (dfa->nodes[cur_node].type == type

3189

&& dfa->nodes[cur_node].opr.idx == ex_subexp)

3190

{

3191

if (type == OP_CLOSE_SUBEXP)

3192

{

3193

err = re_node_set_insert (dst_nodes, cur_node);

3194

if (BE (err == -1, 0)__builtin_expect (err == -1, 0))

3195

return REG_ESPACE;

3196

}

3197

break;

3198

}

3199

err = re_node_set_insert (dst_nodes, cur_node);

3200

if (BE (err == -1, 0)__builtin_expect (err == -1, 0))

3201

return REG_ESPACE;

3202

if (dfa->edests[cur_node].nelem == 0)

3203

break;

3204

if (dfa->edests[cur_node].nelem == 2)

3205

{

3206

err = check_arrival_expand_ecl_sub (dfa, dst_nodes,

3207

dfa->edests[cur_node].elems[1],

3208

ex_subexp, type);

3209

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3210

return err;

3211

}

3212

cur_node = dfa->edests[cur_node].elems[0];

3213

}

3214

return REG_NOERROR;

3215

}

3216

3217

3218

/* For all the back references in the current state, calculate the

3219

destination of the back references by the appropriate entry

3220

in MCTX->BKREF_ENTS. */

3221

3222

static reg_errcode_t

3223

internal_function

3224

expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,

3225

int cur_str, int subexp_num, int type)

3226

{

3227

const re_dfa_t *const dfa = mctx->dfa;

3228

reg_errcode_t err;

3229

int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);

3230

struct re_backref_cache_entry *ent;

3231

3232

if (cache_idx_start == -1)

3233

return REG_NOERROR;

3234

3235

restart:

3236

ent = mctx->bkref_ents + cache_idx_start;

3237

3238

{

3239

int to_idx, next_node;

3240

3241

/* Is this entry ENT is appropriate? */

3242

if (!re_node_set_contains (cur_nodes, ent->node))

3243

continue; /* No. */

3244

3245

to_idx = cur_str + ent->subexp_to - ent->subexp_from;

3246

/* Calculate the destination of the back reference, and append it

3247

to MCTX->STATE_LOG. */

3248

if (to_idx == cur_str)

3249

{

3250

/* The backreference did epsilon transit, we must re-check all the

3251

node in the current state. */

3252

re_node_set new_dests;

3253

reg_errcode_t err2, err3;

3254

next_node = dfa->edests[ent->node].elems[0];

3255

if (re_node_set_contains (cur_nodes, next_node))

3256

continue;

3257

err = re_node_set_init_1 (&new_dests, next_node);

3258

err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);

3259

err3 = re_node_set_merge (cur_nodes, &new_dests);

3260

re_node_set_free (&new_dests)free ((&new_dests)->elems);

3261

if (BE (err != REG_NOERROR || err2 != REG_NOERROR__builtin_expect (err != REG_NOERROR || err2 != REG_NOERROR ||
err3 != REG_NOERROR, 0)

3262

|| err3 != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR || err2 != REG_NOERROR ||
err3 != REG_NOERROR, 0))

3263

{

3264

err = (err != REG_NOERROR ? err

3265

: (err2 != REG_NOERROR ? err2 : err3));

3266

return err;

3267

}

3268

/* TODO: It is still inefficient... */

3269

goto restart;

3270

}

3271

else

3272

{

3273

re_node_set union_set;

3274

next_node = dfa->nexts[ent->node];

3275

if (mctx->state_log[to_idx])

3276

{

3277

int ret;

3278

if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,

3279

next_node))

3280

continue;

3281

err = re_node_set_init_copy (&union_set,

3282

&mctx->state_log[to_idx]->nodes);

3283

ret = re_node_set_insert (&union_set, next_node);

3284

if (BE (err != REG_NOERROR || ret < 0, 0)__builtin_expect (err != REG_NOERROR || ret < 0, 0))

3285

{

3286

re_node_set_free (&union_set)free ((&union_set)->elems);

3287

err = err != REG_NOERROR ? err : REG_ESPACE;

3288

return err;

3289

}

3290

}

3291

else

3292

{

3293

err = re_node_set_init_1 (&union_set, next_node);

3294

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3295

return err;

3296

}

3297

mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);

3298

re_node_set_free (&union_set)free ((&union_set)->elems);

3299

if (BE (mctx->state_log[to_idx] == NULL__builtin_expect (mctx->state_log[to_idx] == ((void*)0) &&
err != REG_NOERROR, 0)

3300

&& err != REG_NOERROR, 0)__builtin_expect (mctx->state_log[to_idx] == ((void*)0) &&
err != REG_NOERROR, 0))

3301

return err;

3302

}

3303

}

3304

while (ent++->more);

3305

return REG_NOERROR;

3306

}

3307

3308

/* Build transition table for the state.

3309

Return 1 if succeeded, otherwise return NULL. */

3310

3311

static int

3312

internal_function

3313

build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)

3314

{

3315

reg_errcode_t err;

3316

int i, j, ch, need_word_trtable = 0;

3317

bitset_word_t elem, mask;

3318

boolint dests_node_malloced = false(0);

3319

boolint dest_states_malloced = false(0);

3320

int ndests; /* Number of the destination states from `state'. */

3321

re_dfastate_t **trtable;

3322

re_dfastate_t **dest_states = NULL((void*)0), **dest_states_word, **dest_states_nl;

3323

re_node_set follows, *dests_node;

3324

bitset_t *dests_ch;

3325

bitset_t acceptable;

3326

3327

struct dests_alloc

3328

{

3329

re_node_set dests_node[SBC_MAX256];

3330

bitset_t dests_ch[SBC_MAX256];

3331

} *dests_alloc;

3332

3333

/* We build DFA states which corresponds to the destination nodes

3334

from `state'. `dests_node[i]' represents the nodes which i-th

3335

destination state contains, and `dests_ch[i]' represents the

3336

characters which i-th destination state accepts. */

3337

#ifdef HAVE_ALLOCA

3338

if (__libc_use_alloca (sizeof (struct dests_alloc))0)

3339

dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc))__builtin_alloca(sizeof (struct dests_alloc));

3340

else

3341

#endif

3342

{

3343

dests_alloc = re_malloc (struct dests_alloc, 1)((struct dests_alloc *) malloc ((1) * sizeof (struct dests_alloc
)));

3344

if (BE (dests_alloc == NULL, 0)__builtin_expect (dests_alloc == ((void*)0), 0))

Taking false branch

→

3345

return 0;

3346

dests_node_malloced = true(1);

3347

}

3348

dests_node = dests_alloc->dests_node;

3349

dests_ch = dests_alloc->dests_ch;

3350

3351

/* Initialize transiton table. */

3352

state->word_trtable = state->trtable = NULL((void*)0);

3353

3354

/* At first, group all nodes belonging to `state' into several

3355

destinations. */

3356

ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);

3357

if (BE (ndests <= 0, 0)__builtin_expect (ndests <= 0, 0))

←

Taking false branch

→

3358

{

3359

if (dests_node_malloced)

3360

free (dests_alloc);

3361

/* Return 0 in case of an error, 1 otherwise. */

3362

if (ndests == 0)

3363

{

3364

state->trtable = (re_dfastate_t **)

3365

calloc (sizeof (re_dfastate_t *), SBC_MAX256);

3366

return 1;

3367

}

3368

return 0;

3369

}

3370

3371

err = re_node_set_alloc (&follows, ndests + 1);

3372

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

←

Taking false branch

→

3373

goto out_free;

3374

3375

/* Avoid arithmetic overflow in size calculation. */

3376

if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)__builtin_expect ((((18446744073709551615ULL - (sizeof (re_node_set
) + sizeof (bitset_t)) * 256) / (3 * sizeof (re_dfastate_t *)
)) < ndests), 0)

←

Taking false branch

→

3377

/ (3 * sizeof (re_dfastate_t *)))__builtin_expect ((((18446744073709551615ULL - (sizeof (re_node_set
) + sizeof (bitset_t)) * 256) / (3 * sizeof (re_dfastate_t *)
)) < ndests), 0)

3378

< ndests),__builtin_expect ((((18446744073709551615ULL - (sizeof (re_node_set
) + sizeof (bitset_t)) * 256) / (3 * sizeof (re_dfastate_t *)
)) < ndests), 0)

3379

0)__builtin_expect ((((18446744073709551615ULL - (sizeof (re_node_set
) + sizeof (bitset_t)) * 256) / (3 * sizeof (re_dfastate_t *)
)) < ndests), 0))

3380

goto out_free;

3381

3382

#ifdef HAVE_ALLOCA

3383

if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX0

3384

+ ndests * 3 * sizeof (re_dfastate_t *))0)

3385

dest_states = (re_dfastate_t **)

3386

alloca (ndests * 3 * sizeof (re_dfastate_t *))__builtin_alloca(ndests * 3 * sizeof (re_dfastate_t *));

3387

else

3388

#endif

3389

{

3390

dest_states = (re_dfastate_t **)

3391

malloc (ndests * 3 * sizeof (re_dfastate_t *));

3392

if (BE (dest_states == NULL, 0)__builtin_expect (dest_states == ((void*)0), 0))

←

Taking false branch

→

3393

{

3394

out_free:

3395

if (dest_states_malloced)

3396

free (dest_states);

3397

re_node_set_free (&follows)free ((&follows)->elems);

3398

for (i = 0; i < ndests; ++i)

3399

re_node_set_free (dests_node + i)free ((dests_node + i)->elems);

3400

if (dests_node_malloced)

3401

free (dests_alloc);

3402

return 0;

3403

}

3404

dest_states_malloced = true(1);

3405

}

3406

dest_states_word = dest_states + ndests;

3407

dest_states_nl = dest_states_word + ndests;

3408

bitset_empty (acceptable)__builtin___memset_chk (acceptable, '\0', sizeof (bitset_t), __builtin_object_size
(acceptable, 0));

3409

3410

/* Then build the states for all destinations. */

3411

for (i = 0; i < ndests; ++i)

←

Loop condition is true. Entering loop body

→

←

Assuming 'i' is >= 'ndests'

→

←

Loop condition is false. Execution continues on line 3454

→

3412

{

3413

int next_node;

3414

re_node_set_empty (&follows)((&follows)->nelem = 0);

3415

/* Merge the follows of this destination states. */

3416

for (j = 0; j < dests_node[i].nelem; ++j)

←

Loop condition is false. Execution continues on line 3426

→

3417

{

3418

next_node = dfa->nexts[dests_node[i].elems[j]];

3419

if (next_node != -1)

3420

{

3421

err = re_node_set_merge (&follows, dfa->eclosures + next_node);

3422

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3423

goto out_free;

3424

}

3425

}

3426

dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);

3427

if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)__builtin_expect (dest_states[i] == ((void*)0) && err
!= REG_NOERROR, 0))

←

Taking false branch

→

3428

goto out_free;

3429

/* If the new state has context constraint,

3430

build appropriate states for these contexts. */

3431

if (dest_states[i]->has_constraint)

←

Taking false branch

→

3432

{

3433

dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,

3434

CONTEXT_WORD1);

3435

if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)__builtin_expect (dest_states_word[i] == ((void*)0) &&
err != REG_NOERROR, 0))

3436

goto out_free;

3437

3438

if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)

3439

need_word_trtable = 1;

3440

3441

dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,

3442

CONTEXT_NEWLINE(1 << 1));

3443

if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)__builtin_expect (dest_states_nl[i] == ((void*)0) && err
!= REG_NOERROR, 0))

3444

goto out_free;

3445

}

3446

else

3447

{

3448

dest_states_word[i] = dest_states[i];

3449

dest_states_nl[i] = dest_states[i];

3450

}

3451

bitset_merge (acceptable, dests_ch[i]);

3452

}

3453

3454

if (!BE (need_word_trtable, 0)__builtin_expect (need_word_trtable, 0))

←

Taking true branch

→

3455

{

3456

/* We don't care about whether the following character is a word

3457

character, or we are in a single-byte character set so we can

3458

discern by looking at the character code: allocate a

3459

256-entry transition table. */

3460

trtable = state->trtable =

3461

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX256);

3462

if (BE (trtable == NULL, 0)__builtin_expect (trtable == ((void*)0), 0))

←

Taking false branch

→

3463

goto out_free;

3464

3465

/* For all characters ch...: */

3466

for (i = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)

←

Loop condition is true. Entering loop body

→

3467

for (ch = i * BITSET_WORD_BITS(sizeof (bitset_word_t) * 8), elem = acceptable[i], mask = 1;

←

Loop condition is true. Entering loop body

→

3468

elem;

3469

mask <<= 1, elem >>= 1, ++ch)

3470

if (BE (elem & 1, 0)__builtin_expect (elem & 1, 0))

←

Taking true branch

→

3471

{

3472

/* There must be exactly one destination which accepts

3473

character ch. See group_nodes_into_DFAstates. */

3474

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

←

Loop condition is true. Entering loop body

→

←

Loop condition is false. Execution continues on line 3478

→

3475

;

3476

3477

/* j-th destination accepts the word character ch. */

3478

if (dfa->word_char[i] & mask)

←

Taking false branch

→

3479

trtable[ch] = dest_states_word[j];

3480

else

3481

trtable[ch] = dest_states[j];

←

Assigned value is garbage or undefined

3482

}

3483

}

3484

else

3485

{

3486

/* We care about whether the following character is a word

3487

character, and we are in a multi-byte character set: discern

3488

by looking at the character code: build two 256-entry

3489

transition tables, one starting at trtable[0] and one

3490

starting at trtable[SBC_MAX]. */

3491

trtable = state->word_trtable =

3492

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX256);

3493

if (BE (trtable == NULL, 0)__builtin_expect (trtable == ((void*)0), 0))

3494

goto out_free;

3495

3496

/* For all characters ch...: */

3497

for (i = 0; i < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++i)

3498

for (ch = i * BITSET_WORD_BITS(sizeof (bitset_word_t) * 8), elem = acceptable[i], mask = 1;

3499

elem;

3500

mask <<= 1, elem >>= 1, ++ch)

3501

if (BE (elem & 1, 0)__builtin_expect (elem & 1, 0))

3502

{

3503

/* There must be exactly one destination which accepts

3504

character ch. See group_nodes_into_DFAstates. */

3505

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3506

;

3507

3508

/* j-th destination accepts the word character ch. */

3509

trtable[ch] = dest_states[j];

3510

trtable[ch + SBC_MAX256] = dest_states_word[j];

3511

}

3512

}

3513

3514

/* new line */

3515

if (bitset_contain (acceptable, NEWLINE_CHAR)(acceptable['\n' / (sizeof (bitset_word_t) * 8)] & ((bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8))))

3516

{

3517

/* The current state accepts newline character. */

3518

for (j = 0; j < ndests; ++j)

3519

if (bitset_contain (dests_ch[j], NEWLINE_CHAR)(dests_ch[j]['\n' / (sizeof (bitset_word_t) * 8)] & ((bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8))))

3520

{

3521

/* k-th destination accepts newline character. */

3522

trtable[NEWLINE_CHAR'\n'] = dest_states_nl[j];

3523

if (need_word_trtable)

3524

trtable[NEWLINE_CHAR'\n' + SBC_MAX256] = dest_states_nl[j];

3525

/* There must be only one destination which accepts

3526

newline. See group_nodes_into_DFAstates. */

3527

break;

3528

}

3529

}

3530

3531

if (dest_states_malloced)

3532

free (dest_states);

3533

3534

re_node_set_free (&follows)free ((&follows)->elems);

3535

for (i = 0; i < ndests; ++i)

3536

re_node_set_free (dests_node + i)free ((dests_node + i)->elems);

3537

3538

if (dests_node_malloced)

3539

free (dests_alloc);

3540

3541

return 1;

3542

}

3543

3544

/* Group all nodes belonging to STATE into several destinations.

3545

Then for all destinations, set the nodes belonging to the destination

3546

to DESTS_NODE[i] and set the characters accepted by the destination

3547

to DEST_CH[i]. This function return the number of destinations. */

3548

3549

static int

3550

internal_function

3551

group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,

3552

re_node_set *dests_node, bitset_t *dests_ch)

3553

{

3554

reg_errcode_t err;

3555

int result;

3556

int i, j, k;

3557

int ndests; /* Number of the destinations from `state'. */

3558

bitset_t accepts; /* Characters a node can accept. */

3559

const re_node_set *cur_nodes = &state->nodes;

3560

bitset_empty (accepts)__builtin___memset_chk (accepts, '\0', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3561

ndests = 0;

3562

3563

/* For all the nodes belonging to `state', */

3564

for (i = 0; i < cur_nodes->nelem; ++i)

3565

{

3566

re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];

3567

re_token_type_t type = node->type;

3568

unsigned int constraint = node->constraint;

3569

3570

/* Enumerate all single byte character this node can accept. */

3571

if (type == CHARACTER)

3572

bitset_set (accepts, node->opr.c)(accepts[node->opr.c / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << node->opr.c % (sizeof (bitset_word_t) * 8));

3573

else if (type == SIMPLE_BRACKET)

3574

{

3575

bitset_merge (accepts, node->opr.sbcset);

3576

}

3577

else if (type == OP_PERIOD)

3578

{

3579

#ifdef RE_ENABLE_I18N

3580

if (dfa->mb_cur_max > 1)

3581

bitset_merge (accepts, dfa->sb_char);

3582

else

3583

#endif

3584

bitset_set_all (accepts)__builtin___memset_chk (accepts, '\xff', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3585

if (!(dfa->syntax & RE_DOT_NEWLINE((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1)))

3586

bitset_clear (accepts, '\n')(accepts['\n' / (sizeof (bitset_word_t) * 8)] &= ~((bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8)));

3587

if (dfa->syntax & RE_DOT_NOT_NULL(((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1))

3588

bitset_clear (accepts, '\0')(accepts['\0' / (sizeof (bitset_word_t) * 8)] &= ~((bitset_word_t
) 1 << '\0' % (sizeof (bitset_word_t) * 8)));

3589

}

3590

#ifdef RE_ENABLE_I18N

3591

else if (type == OP_UTF8_PERIOD)

3592

{

3593

memset (accepts, '\xff', sizeof (bitset_t) / 2)__builtin___memset_chk (accepts, '\xff', sizeof (bitset_t) / 2
, __builtin_object_size (accepts, 0));

3594

if (!(dfa->syntax & RE_DOT_NEWLINE((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1)))

3595

bitset_clear (accepts, '\n')(accepts['\n' / (sizeof (bitset_word_t) * 8)] &= ~((bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8)));

3596

if (dfa->syntax & RE_DOT_NOT_NULL(((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1))

3597

bitset_clear (accepts, '\0')(accepts['\0' / (sizeof (bitset_word_t) * 8)] &= ~((bitset_word_t
) 1 << '\0' % (sizeof (bitset_word_t) * 8)));

3598

}

3599

#endif

3600

else

3601

continue;

3602

3603

/* Check the `accepts' and sift the characters which are not

3604

match it the context. */

3605

if (constraint)

3606

{

3607

if (constraint & NEXT_NEWLINE_CONSTRAINT0x0020)

3608

{

3609

boolint accepts_newline = bitset_contain (accepts, NEWLINE_CHAR)(accepts['\n' / (sizeof (bitset_word_t) * 8)] & ((bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8)));

3610

bitset_empty (accepts)__builtin___memset_chk (accepts, '\0', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3611

if (accepts_newline)

3612

bitset_set (accepts, NEWLINE_CHAR)(accepts['\n' / (sizeof (bitset_word_t) * 8)] |= (bitset_word_t
) 1 << '\n' % (sizeof (bitset_word_t) * 8));

3613

else

3614

continue;

3615

}

3616

if (constraint & NEXT_ENDBUF_CONSTRAINT0x0080)

3617

{

3618

bitset_empty (accepts)__builtin___memset_chk (accepts, '\0', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3619

continue;

3620

}

3621

3622

if (constraint & NEXT_WORD_CONSTRAINT0x0004)

3623

{

3624

bitset_word_t any_set = 0;

3625

if (type == CHARACTER && !node->word_char)

3626

{

3627

bitset_empty (accepts)__builtin___memset_chk (accepts, '\0', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3628

continue;

3629

}

3630

#ifdef RE_ENABLE_I18N

3631

if (dfa->mb_cur_max > 1)

3632

for (j = 0; j < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++j)

3633

any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));

3634

else

3635

#endif

3636

for (j = 0; j < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++j)

3637

any_set |= (accepts[j] &= dfa->word_char[j]);

3638

if (!any_set)

3639

continue;

3640

}

3641

if (constraint & NEXT_NOTWORD_CONSTRAINT0x0008)

3642

{

3643

bitset_word_t any_set = 0;

3644

if (type == CHARACTER && node->word_char)

3645

{

3646

bitset_empty (accepts)__builtin___memset_chk (accepts, '\0', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3647

continue;

3648

}

3649

#ifdef RE_ENABLE_I18N

3650

if (dfa->mb_cur_max > 1)

3651

for (j = 0; j < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++j)

3652

any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));

3653

else

3654

#endif

3655

for (j = 0; j < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++j)

3656

any_set |= (accepts[j] &= ~dfa->word_char[j]);

3657

if (!any_set)

3658

continue;

3659

}

3660

}

3661

3662

/* Then divide `accepts' into DFA states, or create a new

3663

state. Above, we make sure that accepts is not empty. */

3664

for (j = 0; j < ndests; ++j)

3665

{

3666

bitset_t intersec; /* Intersection sets, see below. */

3667

bitset_t remains;

3668

/* Flags, see below. */

3669

bitset_word_t has_intersec, not_subset, not_consumed;

3670

3671

/* Optimization, skip if this state doesn't accept the character. */

3672

if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)(dests_ch[j][node->opr.c / (sizeof (bitset_word_t) * 8)] &
((bitset_word_t) 1 << node->opr.c % (sizeof (bitset_word_t
) * 8))))

3673

continue;

3674

3675

/* Enumerate the intersection set of this state and `accepts'. */

3676

has_intersec = 0;

3677

for (k = 0; k < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++k)

3678

has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];

3679

/* And skip if the intersection set is empty. */

3680

if (!has_intersec)

3681

continue;

3682

3683

/* Then check if this state is a subset of `accepts'. */

3684

not_subset = not_consumed = 0;

3685

for (k = 0; k < BITSET_WORDS(256 / (sizeof (bitset_word_t) * 8)); ++k)

3686

{

3687

not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];

3688

not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];

3689

}

3690

3691

/* If this state isn't a subset of `accepts', create a

3692

new group state, which has the `remains'. */

3693

if (not_subset)

3694

{

3695

bitset_copy (dests_ch[ndests], remains)__builtin___memcpy_chk (dests_ch[ndests], remains, sizeof (bitset_t
), __builtin_object_size (dests_ch[ndests], 0));

3696

bitset_copy (dests_ch[j], intersec)__builtin___memcpy_chk (dests_ch[j], intersec, sizeof (bitset_t
), __builtin_object_size (dests_ch[j], 0));

3697

err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);

3698

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3699

goto error_return;

3700

++ndests;

3701

}

3702

3703

/* Put the position in the current group. */

3704

result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);

3705

if (BE (result < 0, 0)__builtin_expect (result < 0, 0))

3706

goto error_return;

3707

3708

/* If all characters are consumed, go to next node. */

3709

if (!not_consumed)

3710

break;

3711

}

3712

/* Some characters remain, create a new group. */

3713

if (j == ndests)

3714

{

3715

bitset_copy (dests_ch[ndests], accepts)__builtin___memcpy_chk (dests_ch[ndests], accepts, sizeof (bitset_t
), __builtin_object_size (dests_ch[ndests], 0));

3716

err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);

3717

if (BE (err != REG_NOERROR, 0)__builtin_expect (err != REG_NOERROR, 0))

3718

goto error_return;

3719

++ndests;

3720

bitset_empty (accepts)__builtin___memset_chk (accepts, '\0', sizeof (bitset_t), __builtin_object_size
(accepts, 0));

3721

}

3722

}

3723

return ndests;

3724

error_return:

3725

for (j = 0; j < ndests; ++j)

3726

re_node_set_free (dests_node + j)free ((dests_node + j)->elems);

3727

return -1;

3728

}

3729

3730

#ifdef RE_ENABLE_I18N

3731

/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.

3732

Return the number of the bytes the node accepts.

3733

STR_IDX is the current index of the input string.

3734

3735

This function handles the nodes which can accept one character, or

3736

one collating element like '.', '[a-z]', opposite to the other nodes

3737

can only accept one byte. */

3738

3739

static int

3740

internal_function

3741

check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,

3742

const re_string_t *input, int str_idx)

3743

{

3744

const re_token_t *node = dfa->nodes + node_idx;

3745

int char_len, elem_len;

3746

int i;

3747

wint_t wc;

3748

3749

if (BE (node->type == OP_UTF8_PERIOD, 0)__builtin_expect (node->type == OP_UTF8_PERIOD, 0))

3750

{

3751

unsigned char c = re_string_byte_at (input, str_idx)((input)->mbs[str_idx]), d;

3752

if (BE (c < 0xc2, 1)__builtin_expect (c < 0xc2, 1))

3753

return 0;

3754

3755

if (str_idx + 2 > input->len)

3756

return 0;

3757

3758

d = re_string_byte_at (input, str_idx + 1)((input)->mbs[str_idx + 1]);

3759

if (c < 0xe0)

3760

return (d < 0x80 || d > 0xbf) ? 0 : 2;

3761

else if (c < 0xf0)

3762

{

3763

char_len = 3;

3764

if (c == 0xe0 && d < 0xa0)

3765

return 0;

3766

}

3767

else if (c < 0xf8)

3768

{

3769

char_len = 4;

3770

if (c == 0xf0 && d < 0x90)

3771

return 0;

3772

}

3773

else if (c < 0xfc)

3774

{

3775

char_len = 5;

3776

if (c == 0xf8 && d < 0x88)

3777

return 0;

3778

}

3779

else if (c < 0xfe)

3780

{

3781

char_len = 6;

3782

if (c == 0xfc && d < 0x84)

3783

return 0;

3784

}

3785

else

3786

return 0;

3787

3788

if (str_idx + char_len > input->len)

3789

return 0;

3790

3791

for (i = 1; i < char_len; ++i)

3792

{

3793

d = re_string_byte_at (input, str_idx + i)((input)->mbs[str_idx + i]);

3794

if (d < 0x80 || d > 0xbf)

3795

return 0;

3796

}

3797

return char_len;

3798

}

3799

3800

char_len = re_string_char_size_at (input, str_idx);

3801

if (node->type == OP_PERIOD)

3802

{

3803

if (char_len <= 1)

3804

return 0;

3805

/* FIXME: I don't think this if is needed, as both '\n'

3806

and '\0' are char_len == 1. */

3807

/* '.' accepts any one character except the following two cases. */

3808

if ((!(dfa->syntax & RE_DOT_NEWLINE((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1)) &&

3809

re_string_byte_at (input, str_idx)((input)->mbs[str_idx]) == '\n') ||

3810

((dfa->syntax & RE_DOT_NOT_NULL(((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1)) &&

3811

re_string_byte_at (input, str_idx)((input)->mbs[str_idx]) == '\0'))

3812

return 0;

3813

return char_len;

3814

}

3815

3816

elem_len = re_string_elem_size_at (input, str_idx);

3817

wc = __btowcbtowc(*(input->mbs+str_idx));

3818

if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX256))

3819

return 0;

3820

3821

if (node->type == COMPLEX_BRACKET)

3822

{

3823

const re_charset_t *cset = node->opr.mbcset;

3824

# ifdef _LIBC

3825

const unsigned char *pin

3826

= ((const unsigned char *) re_string_get_buffer (input)((input)->mbs) + str_idx);

3827

int j;

3828

uint32_t nrules;

3829

# endif /* _LIBC */

3830

int match_len = 0;

3831

wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)

3832

? re_string_wchar_at (input, str_idx) : 0);

3833

3834

/* match with multibyte character? */

3835

for (i = 0; i < cset->nmbchars; ++i)

3836

if (wc == cset->mbchars[i])

3837

{

3838

match_len = char_len;

3839

goto check_node_accept_bytes_match;

3840

}

3841

/* match with character_class? */

3842

for (i = 0; i < cset->nchar_classes; ++i)

3843

{

3844

wctype_t wt = cset->char_classes[i];

3845

if (__iswctypeiswctype (wc, wt))

3846

{

3847

match_len = char_len;

3848

goto check_node_accept_bytes_match;

3849

}

3850

}

3851

3852

# ifdef _LIBC

3853

nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3854

if (nrules != 0)

3855

{

3856

unsigned int in_collseq = 0;

3857

const int32_t *table, *indirect;

3858

const unsigned char *weights, *extra;

3859

const char *collseqwc;

3860

/* This #include defines a local function! */

3861

# include <locale/weight.h>

3862

3863

/* match with collating_symbol? */

3864

if (cset->ncoll_syms)

3865

extra = (const unsigned char *)

3866

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

3867

for (i = 0; i < cset->ncoll_syms; ++i)

3868

{

3869

const unsigned char *coll_sym = extra + cset->coll_syms[i];

3870

/* Compare the length of input collating element and

3871

the length of current collating element. */

3872

if (*coll_sym != elem_len)

3873

continue;

3874

/* Compare each bytes. */

3875

for (j = 0; j < *coll_sym; j++)

3876

if (pin[j] != coll_sym[1 + j])

3877

break;

3878

if (j == *coll_sym)

3879

{

3880

/* Match if every bytes is equal. */

3881

match_len = j;

3882

goto check_node_accept_bytes_match;

3883

}

3884

}

3885

3886

if (cset->nranges)

3887

{

3888

if (elem_len <= char_len)

3889

{

3890

collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);

3891

in_collseq = __collseq_table_lookup (collseqwc, wc);

3892

}

3893

else

3894

in_collseq = find_collation_sequence_value (pin, elem_len);

3895

}

3896

/* match with range expression? */

3897

for (i = 0; i < cset->nranges; ++i)

3898

if (cset->range_starts[i] <= in_collseq

3899

&& in_collseq <= cset->range_ends[i])

3900

{

3901

match_len = elem_len;

3902

goto check_node_accept_bytes_match;

3903

}

3904

3905

/* match with equivalence_class? */

3906

if (cset->nequiv_classes)

3907

{

3908

const unsigned char *cp = pin;

3909

table = (const int32_t *)

3910

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);

3911

weights = (const unsigned char *)

3912

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);

3913

extra = (const unsigned char *)

3914

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);

3915

indirect = (const int32_t *)

3916

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);

3917

int32_t idx = findidx (&cp);

3918

if (idx > 0)

3919

for (i = 0; i < cset->nequiv_classes; ++i)

3920

{

3921

int32_t equiv_class_idx = cset->equiv_classes[i];

3922

size_t weight_len = weights[idx & 0xffffff];

3923

if (weight_len == weights[equiv_class_idx & 0xffffff]

3924

&& (idx >> 24) == (equiv_class_idx >> 24))

3925

{

3926

int cnt = 0;

3927

3928

idx &= 0xffffff;

3929

equiv_class_idx &= 0xffffff;

3930

3931

while (cnt <= weight_len

3932

&& (weights[equiv_class_idx + 1 + cnt]

3933

== weights[idx + 1 + cnt]))

3934

++cnt;

3935

if (cnt > weight_len)

3936

{

3937

match_len = elem_len;

3938

goto check_node_accept_bytes_match;

3939

}

3940

}

3941

}

3942

}

3943

}

3944

else

3945

# endif /* _LIBC */

3946

{

3947

/* match with range expression? */

3948

#if __GNUC__4 >= 2

3949

wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};

3950

#else

3951

wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};

3952

cmp_buf[2] = wc;

3953

#endif

3954

for (i = 0; i < cset->nranges; ++i)

3955

{

3956

cmp_buf[0] = cset->range_starts[i];

3957

cmp_buf[4] = cset->range_ends[i];

3958

if (wcscoll (cmp_buf, cmp_buf + 2) <= 0

3959

&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)

3960

{

3961

match_len = char_len;

3962

goto check_node_accept_bytes_match;

3963

}

3964

}

3965

}

3966

check_node_accept_bytes_match:

3967

if (!cset->non_match)

3968

return match_len;

3969

else

3970

{

3971

if (match_len > 0)

3972

return 0;

3973

else

3974

return (elem_len > char_len) ? elem_len : char_len;

3975

}

3976

}

3977

return 0;

3978

}

3979

3980

# ifdef _LIBC

3981

static unsigned int

3982

internal_function

3983

find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)

3984

{

3985

uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3986

if (nrules == 0)

3987

{

3988

if (mbs_len == 1)

3989

{

3990

/* No valid character. Match it as a single byte character. */

3991

const unsigned char *collseq = (const unsigned char *)

3992

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);

3993

return collseq[mbs[0]];

3994

}

3995

return UINT_MAX(2147483647 *2U +1U);

3996

}

3997

else

3998

{

3999

int32_t idx;

4000

const unsigned char *extra = (const unsigned char *)

4001

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

4002

int32_t extrasize = (const unsigned char *)

4003

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;

4004

4005

for (idx = 0; idx < extrasize;)

4006

{

4007

int mbs_cnt, found = 0;

4008

int32_t elem_mbs_len;

4009

/* Skip the name of collating element name. */

4010

idx = idx + extra[idx] + 1;

4011

elem_mbs_len = extra[idx++];

4012

if (mbs_len == elem_mbs_len)

4013

{

4014

for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)

4015

if (extra[idx + mbs_cnt] != mbs[mbs_cnt])

4016

break;

4017

if (mbs_cnt == elem_mbs_len)

4018

/* Found the entry. */

4019

found = 1;

4020

}

4021

/* Skip the byte sequence of the collating element. */

4022

idx += elem_mbs_len;

4023

/* Adjust for the alignment. */

4024

idx = (idx + 3) & ~3;

4025

/* Skip the collation sequence value. */

4026

idx += sizeof (uint32_t);

4027

/* Skip the wide char sequence of the collating element. */

4028

idx = idx + sizeof (uint32_t) * (extra[idx] + 1);

4029

/* If we found the entry, return the sequence value. */

4030

if (found)

4031

return *(uint32_t *) (extra + idx);

4032

/* Skip the collation sequence value. */

4033

idx += sizeof (uint32_t);

4034

}

4035

return UINT_MAX(2147483647 *2U +1U);

4036

}

4037

}

4038

# endif /* _LIBC */

4039

#endif /* RE_ENABLE_I18N */

4040

4041

/* Check whether the node accepts the byte which is IDX-th

4042

byte of the INPUT. */

4043

4044

static int

4045

internal_function

4046

check_node_accept (const re_match_context_t *mctx, const re_token_t *node,

4047

int idx)

4048

{

4049

unsigned char ch;

4050

ch = re_string_byte_at (&mctx->input, idx)((&mctx->input)->mbs[idx]);

4051

switch (node->type)

4052

{

4053

case CHARACTER:

4054

if (node->opr.c != ch)

4055

return 0;

4056

break;

4057

4058

case SIMPLE_BRACKET:

4059

if (!bitset_contain (node->opr.sbcset, ch)(node->opr.sbcset[ch / (sizeof (bitset_word_t) * 8)] &
((bitset_word_t) 1 << ch % (sizeof (bitset_word_t) * 8
))))

4060

return 0;

4061

break;

4062

4063

#ifdef RE_ENABLE_I18N

4064

case OP_UTF8_PERIOD:

4065

if (ch >= 0x80)

4066

return 0;

4067

/* FALLTHROUGH */

4068

#endif

4069

case OP_PERIOD:

4070

if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1)))

4071

|| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL(((((((((unsigned long int) 1) << 1) << 1) <<
1) << 1) << 1) << 1) << 1))))

4072

return 0;

4073

break;

4074

4075

default:

4076

return 0;

4077

}

4078

4079

if (node->constraint)

4080

{

4081

/* The node has constraints. Check whether the current context

4082

satisfies the constraints. */

4083

unsigned int context = re_string_context_at (&mctx->input, idx,

4084

mctx->eflags);

4085

4086

return 0;

4087

}

4088

4089

return 1;

4090

}

4091

4092

/* Extend the buffers, if the buffers have run out. */

4093

4094

static reg_errcode_t

4095

internal_function

4096

extend_buffers (re_match_context_t *mctx)

4097

{

4098

reg_errcode_t ret;

4099

re_string_t *pstr = &mctx->input;

4100

4101

/* Avoid overflow. */

4102

if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)__builtin_expect (2147483647 / 2 / sizeof (re_dfastate_t *) <=
pstr->bufs_len, 0))

4103

return REG_ESPACE;

4104

4105

/* Double the lengthes of the buffers. */

4106

ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);

4107

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

4108

return ret;

4109

4110

if (mctx->state_log != NULL((void*)0))

4111

{

4112

/* And double the length of state_log. */

4113

/* XXX We have no indication of the size of this buffer. If this

4114

allocation fail we have no indication that the state_log array

4115

does not have the right size. */

4116

re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,((mctx->state_log != ((void*)0)) ? (re_dfastate_t * *) realloc
(mctx->state_log,(pstr->bufs_len + 1)*sizeof(re_dfastate_t
*)) : (re_dfastate_t * *) calloc(pstr->bufs_len + 1,sizeof
(re_dfastate_t *)))

4117

pstr->bufs_len + 1)((mctx->state_log != ((void*)0)) ? (re_dfastate_t * *) realloc
(mctx->state_log,(pstr->bufs_len + 1)*sizeof(re_dfastate_t
*)) : (re_dfastate_t * *) calloc(pstr->bufs_len + 1,sizeof
(re_dfastate_t *)));

4118

if (BE (new_array == NULL, 0)__builtin_expect (new_array == ((void*)0), 0))

4119

return REG_ESPACE;

4120

mctx->state_log = new_array;

4121

}

4122

4123

/* Then reconstruct the buffers. */

4124

if (pstr->icase)

4125

{

4126

#ifdef RE_ENABLE_I18N

4127

if (pstr->mb_cur_max > 1)

4128

{

4129

ret = build_wcs_upper_buffer (pstr);

4130

if (BE (ret != REG_NOERROR, 0)__builtin_expect (ret != REG_NOERROR, 0))

4131

return ret;

4132

}

4133

else

4134

#endif /* RE_ENABLE_I18N */

4135

build_upper_buffer (pstr);

4136

}

4137

else

4138

{

4139

#ifdef RE_ENABLE_I18N

4140

if (pstr->mb_cur_max > 1)

4141

build_wcs_buffer (pstr);

4142

else

4143

#endif /* RE_ENABLE_I18N */

4144

{

4145

if (pstr->trans != NULL((void*)0))

4146

re_string_translate_buffer (pstr);

4147

}

4148

}

4149

return REG_NOERROR;

4150

}

4151

4152

4153

/* Functions for matching context. */

4154

4155

/* Initialize MCTX. */

4156

4157

static reg_errcode_t

4158

internal_function

4159

match_ctx_init (re_match_context_t *mctx, int eflags, int n)

4160

{

4161

mctx->eflags = eflags;

4162

mctx->match_last = -1;

4163

if (n > 0)

4164

{

4165

mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n)((struct re_backref_cache_entry *) malloc ((n) * sizeof (struct
re_backref_cache_entry)));

4166

mctx->sub_tops = re_malloc (re_sub_match_top_t *, n)((re_sub_match_top_t * *) malloc ((n) * sizeof (re_sub_match_top_t
*)));

4167

if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)__builtin_expect (mctx->bkref_ents == ((void*)0) || mctx->
sub_tops == ((void*)0), 0))

4168

return REG_ESPACE;

4169

}

4170

/* Already zero-ed by the caller.

4171

else

4172

mctx->bkref_ents = NULL;

4173

mctx->nbkref_ents = 0;

4174

mctx->nsub_tops = 0; */

4175

mctx->abkref_ents = n;

4176

mctx->max_mb_elem_len = 1;

4177

mctx->asub_tops = n;

4178

return REG_NOERROR;

4179

}

4180

4181

/* Clean the entries which depend on the current input in MCTX.

4182

This function must be invoked when the matcher changes the start index

4183

of the input, or changes the input string. */

4184

4185

static void

4186

internal_function

4187

match_ctx_clean (re_match_context_t *mctx)

4188

{

4189

int st_idx;

4190

for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)

4191

{

4192

int sl_idx;

4193

re_sub_match_top_t *top = mctx->sub_tops[st_idx];

4194

for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)

4195

{

4196

re_sub_match_last_t *last = top->lasts[sl_idx];

4197

re_free (last->path.array)free (last->path.array);

4198

re_free (last)free (last);

4199

}

4200

re_free (top->lasts)free (top->lasts);

4201

if (top->path)

4202

{

4203

re_free (top->path->array)free (top->path->array);

4204

re_free (top->path)free (top->path);

4205

}

4206

free (top);

4207

}

4208

4209

mctx->nsub_tops = 0;

4210

mctx->nbkref_ents = 0;

4211

}

4212

4213

/* Free all the memory associated with MCTX. */

4214

4215

static void

4216

internal_function

4217

match_ctx_free (re_match_context_t *mctx)

4218

{

4219

/* First, free all the memory associated with MCTX->SUB_TOPS. */

4220

match_ctx_clean (mctx);

4221

re_free (mctx->sub_tops)free (mctx->sub_tops);

4222

re_free (mctx->bkref_ents)free (mctx->bkref_ents);

4223

}

4224

4225

/* Add a new backreference entry to MCTX.

4226

Note that we assume that caller never call this function with duplicate

4227

entry, and call with STR_IDX which isn't smaller than any existing entry.

4228

4229

4230

static reg_errcode_t

4231

internal_function

4232

match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,

4233

int to)

4234

{

4235

if (mctx->nbkref_ents >= mctx->abkref_ents)

4236

{

4237

struct re_backref_cache_entry* new_entry;

4238

new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,((mctx->bkref_ents != ((void*)0)) ? (struct re_backref_cache_entry
*) realloc (mctx->bkref_ents,(mctx->abkref_ents * 2)*sizeof
(struct re_backref_cache_entry)) : (struct re_backref_cache_entry
*) calloc(mctx->abkref_ents * 2,sizeof(struct re_backref_cache_entry
)))

4239

mctx->abkref_ents * 2)((mctx->bkref_ents != ((void*)0)) ? (struct re_backref_cache_entry
*) realloc (mctx->bkref_ents,(mctx->abkref_ents * 2)*sizeof
(struct re_backref_cache_entry)) : (struct re_backref_cache_entry
*) calloc(mctx->abkref_ents * 2,sizeof(struct re_backref_cache_entry
)));

4240

if (BE (new_entry == NULL, 0)__builtin_expect (new_entry == ((void*)0), 0))

4241

{

4242

re_free (mctx->bkref_ents)free (mctx->bkref_ents);

4243

return REG_ESPACE;

4244

}

4245

mctx->bkref_ents = new_entry;

4246

memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',__builtin___memset_chk (mctx->bkref_ents + mctx->nbkref_ents
, '\0', sizeof (struct re_backref_cache_entry) * mctx->abkref_ents
, __builtin_object_size (mctx->bkref_ents + mctx->nbkref_ents
, 0))

4247

sizeof (struct re_backref_cache_entry) * mctx->abkref_ents)__builtin___memset_chk (mctx->bkref_ents + mctx->nbkref_ents
, '\0', sizeof (struct re_backref_cache_entry) * mctx->abkref_ents
, __builtin_object_size (mctx->bkref_ents + mctx->nbkref_ents
, 0));

4248

mctx->abkref_ents *= 2;

4249

}

4250

if (mctx->nbkref_ents > 0

4251

&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)

4252

mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;

4253

4254

mctx->bkref_ents[mctx->nbkref_ents].node = node;

4255

mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;

4256

mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;

4257

mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;

4258

4259

/* This is a cache that saves negative results of check_dst_limits_calc_pos.

4260

If bit N is clear, means that this entry won't epsilon-transition to

4261

an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If

4262

it is set, check_dst_limits_calc_pos_1 will recurse and try to find one

4263

such node.

4264

4265

A backreference does not epsilon-transition unless it is empty, so set

4266

to all zeros if FROM != TO. */

4267

mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map

4268

= (from == to ? ~0 : 0);

4269

4270

mctx->bkref_ents[mctx->nbkref_ents++].more = 0;

4271

if (mctx->max_mb_elem_len < to - from)

4272

mctx->max_mb_elem_len = to - from;

4273

return REG_NOERROR;

4274

}

4275

4276

/* Search for the first entry which has the same str_idx, or -1 if none is

4277

found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */

4278

4279

static int

4280

internal_function

4281

search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)

4282

{

4283

int left, right, mid, last;

4284

last = right = mctx->nbkref_ents;

4285

for (left = 0; left < right;)

4286

{

4287

mid = (left + right) / 2;

4288

if (mctx->bkref_ents[mid].str_idx < str_idx)

4289

left = mid + 1;

4290

else

4291

right = mid;

4292

}

4293

if (left < last && mctx->bkref_ents[left].str_idx == str_idx)

4294

return left;

4295

else

4296

return -1;

4297

}

4298

4299

/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches

4300

at STR_IDX. */

4301

4302

static reg_errcode_t

4303

internal_function

4304

match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)

4305

{

4306

#ifdef DEBUG

4307

assert (mctx->sub_tops != NULL)(__builtin_expect(!(mctx->sub_tops != ((void*)0)), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 4307, "mctx->sub_tops != NULL"
) : (void)0);

4308

assert (mctx->asub_tops > 0)(__builtin_expect(!(mctx->asub_tops > 0), 0) ? __assert_rtn
(__func__, "compat/regex/regexec.c", 4308, "mctx->asub_tops > 0"
) : (void)0);

4309

#endif

4310

if (BE (mctx->nsub_tops == mctx->asub_tops, 0)__builtin_expect (mctx->nsub_tops == mctx->asub_tops, 0
))

4311

{

4312

int new_asub_tops = mctx->asub_tops * 2;

4313

re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,((mctx->sub_tops != ((void*)0)) ? (re_sub_match_top_t * *)
realloc (mctx->sub_tops,(new_asub_tops)*sizeof(re_sub_match_top_t
*)) : (re_sub_match_top_t * *) calloc(new_asub_tops,sizeof(re_sub_match_top_t
*)))

4314

re_sub_match_top_t *,((mctx->sub_tops != ((void*)0)) ? (re_sub_match_top_t * *)
realloc (mctx->sub_tops,(new_asub_tops)*sizeof(re_sub_match_top_t
*)) : (re_sub_match_top_t * *) calloc(new_asub_tops,sizeof(re_sub_match_top_t
*)))

4315

new_asub_tops)((mctx->sub_tops != ((void*)0)) ? (re_sub_match_top_t * *)
realloc (mctx->sub_tops,(new_asub_tops)*sizeof(re_sub_match_top_t
*)) : (re_sub_match_top_t * *) calloc(new_asub_tops,sizeof(re_sub_match_top_t
*)));

4316

if (BE (new_array == NULL, 0)__builtin_expect (new_array == ((void*)0), 0))

4317

return REG_ESPACE;

4318

mctx->sub_tops = new_array;

4319

mctx->asub_tops = new_asub_tops;

4320

}

4321

mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));

4322

if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)__builtin_expect (mctx->sub_tops[mctx->nsub_tops] == ((
void*)0), 0))

4323

return REG_ESPACE;

4324

mctx->sub_tops[mctx->nsub_tops]->node = node;

4325

mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;

4326

return REG_NOERROR;

4327

}

4328

4329

/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches

4330

at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */

4331

4332

static re_sub_match_last_t *

4333

internal_function

4334

match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)

4335

{

4336

re_sub_match_last_t *new_entry;

4337

if (BE (subtop->nlasts == subtop->alasts, 0)__builtin_expect (subtop->nlasts == subtop->alasts, 0))

4338

{

4339

int new_alasts = 2 * subtop->alasts + 1;

4340

re_sub_match_last_t **new_array = re_realloc (subtop->lasts,((subtop->lasts != ((void*)0)) ? (re_sub_match_last_t * *)
realloc (subtop->lasts,(new_alasts)*sizeof(re_sub_match_last_t
*)) : (re_sub_match_last_t * *) calloc(new_alasts,sizeof(re_sub_match_last_t
*)))

4341

re_sub_match_last_t *,((subtop->lasts != ((void*)0)) ? (re_sub_match_last_t * *)
realloc (subtop->lasts,(new_alasts)*sizeof(re_sub_match_last_t
*)) : (re_sub_match_last_t * *) calloc(new_alasts,sizeof(re_sub_match_last_t
*)))

4342

new_alasts)((subtop->lasts != ((void*)0)) ? (re_sub_match_last_t * *)
realloc (subtop->lasts,(new_alasts)*sizeof(re_sub_match_last_t
*)) : (re_sub_match_last_t * *) calloc(new_alasts,sizeof(re_sub_match_last_t
*)));

4343

if (BE (new_array == NULL, 0)__builtin_expect (new_array == ((void*)0), 0))

4344

return NULL((void*)0);

4345

subtop->lasts = new_array;

4346

subtop->alasts = new_alasts;

4347

}

4348

new_entry = calloc (1, sizeof (re_sub_match_last_t));

4349

if (BE (new_entry != NULL, 1)__builtin_expect (new_entry != ((void*)0), 1))

4350

{

4351

subtop->lasts[subtop->nlasts] = new_entry;

4352

new_entry->node = node;

4353

new_entry->str_idx = str_idx;

4354

++subtop->nlasts;

4355

}

4356

return new_entry;

4357

}

4358

4359

static void

4360

internal_function

4361

sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

4362

re_dfastate_t **limited_sts, int last_node, int last_str_idx)

4363

{

4364

sctx->sifted_states = sifted_sts;

4365

sctx->limited_states = limited_sts;

4366

sctx->last_node = last_node;

4367

sctx->last_str_idx = last_str_idx;

4368

re_node_set_init_empty (&sctx->limits)__builtin___memset_chk (&sctx->limits, '\0', sizeof (re_node_set
), __builtin_object_size (&sctx->limits, 0));

4369

}