mirror of
https://github.com/Stichting-MINIX-Research-Foundation/netbsd.git
synced 2025-09-13 17:20:03 -04:00
349 lines
9.0 KiB
C
349 lines
9.0 KiB
C
/*
|
|
tre_regexec.c - TRE POSIX compatible matching functions (and more).
|
|
|
|
This software is released under a BSD-style license.
|
|
See the file LICENSE for details and copyright.
|
|
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif /* HAVE_CONFIG_H */
|
|
|
|
#include "tre-alloca.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#ifdef HAVE_WCHAR_H
|
|
#include <wchar.h>
|
|
#endif /* HAVE_WCHAR_H */
|
|
#ifdef HAVE_WCTYPE_H
|
|
#include <wctype.h>
|
|
#endif /* HAVE_WCTYPE_H */
|
|
#ifndef TRE_WCHAR
|
|
#include <ctype.h>
|
|
#endif /* !TRE_WCHAR */
|
|
#ifdef HAVE_MALLOC_H
|
|
#include <malloc.h>
|
|
#endif /* HAVE_MALLOC_H */
|
|
#include <limits.h>
|
|
|
|
#include "tre-internal.h"
|
|
#include "tre.h"
|
|
#include "xmalloc.h"
|
|
|
|
/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
|
|
endpoint values. */
|
|
void
|
|
tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
|
|
const tre_tnfa_t *tnfa, int *tags, int match_eo)
|
|
{
|
|
tre_submatch_data_t *submatch_data;
|
|
unsigned int i, j;
|
|
int *parents;
|
|
|
|
i = 0;
|
|
if (match_eo >= 0 && !(cflags & REG_NOSUB))
|
|
{
|
|
/* Construct submatch offsets from the tags. */
|
|
DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
|
|
submatch_data = tnfa->submatch_data;
|
|
while (i < tnfa->num_submatches && i < nmatch)
|
|
{
|
|
if (submatch_data[i].so_tag == tnfa->end_tag)
|
|
pmatch[i].rm_so = match_eo;
|
|
else
|
|
pmatch[i].rm_so = tags[submatch_data[i].so_tag];
|
|
|
|
if (submatch_data[i].eo_tag == tnfa->end_tag)
|
|
pmatch[i].rm_eo = match_eo;
|
|
else
|
|
pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
|
|
|
|
/* If either of the endpoints were not used, this submatch
|
|
was not part of the match. */
|
|
if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
|
|
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
|
|
|
DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
|
|
submatch_data[i].so_tag, pmatch[i].rm_so,
|
|
submatch_data[i].eo_tag, pmatch[i].rm_eo));
|
|
i++;
|
|
}
|
|
/* Reset all submatches that are not within all of their parent
|
|
submatches. */
|
|
i = 0;
|
|
while (i < tnfa->num_submatches && i < nmatch)
|
|
{
|
|
if (pmatch[i].rm_eo == -1)
|
|
assert(pmatch[i].rm_so == -1);
|
|
assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
|
|
|
|
parents = submatch_data[i].parents;
|
|
if (parents != NULL)
|
|
for (j = 0; parents[j] >= 0; j++)
|
|
{
|
|
DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
|
|
if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
|
|
|| pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
|
|
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
|
}
|
|
i++;
|
|
}
|
|
}
|
|
|
|
while (i < nmatch)
|
|
{
|
|
pmatch[i].rm_so = -1;
|
|
pmatch[i].rm_eo = -1;
|
|
i++;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Wrapper functions for POSIX compatible regexp matching.
|
|
*/
|
|
|
|
int
|
|
tre_have_backrefs(const regex_t *preg)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
return tnfa->have_backrefs;
|
|
}
|
|
|
|
int
|
|
tre_have_approx(const regex_t *preg)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
return tnfa->have_approx;
|
|
}
|
|
|
|
static int
|
|
tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
|
|
tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
|
|
int eflags)
|
|
{
|
|
reg_errcode_t status;
|
|
int *tags = NULL, eo;
|
|
if (tnfa->num_tags > 0 && nmatch > 0)
|
|
{
|
|
#ifdef TRE_USE_ALLOCA
|
|
tags = alloca(sizeof(*tags) * tnfa->num_tags);
|
|
#else /* !TRE_USE_ALLOCA */
|
|
tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
|
|
#endif /* !TRE_USE_ALLOCA */
|
|
if (tags == NULL)
|
|
return REG_ESPACE;
|
|
}
|
|
|
|
/* Dispatch to the appropriate matcher. */
|
|
if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER)
|
|
{
|
|
/* The regex has back references, use the backtracking matcher. */
|
|
if (type == STR_USER)
|
|
{
|
|
const tre_str_source *source = string;
|
|
if (source->rewind == NULL || source->compare == NULL)
|
|
/* The backtracking matcher requires rewind and compare
|
|
capabilities from the input stream. */
|
|
return REG_BADPAT;
|
|
}
|
|
status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type,
|
|
tags, eflags, &eo);
|
|
}
|
|
#ifdef TRE_APPROX
|
|
else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER)
|
|
{
|
|
/* The regex uses approximate matching, use the approximate matcher. */
|
|
regamatch_t match;
|
|
regaparams_t params;
|
|
tre_regaparams_default(¶ms);
|
|
params.max_err = 0;
|
|
params.max_cost = 0;
|
|
status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
|
|
&match, params, eflags, &eo);
|
|
}
|
|
#endif /* TRE_APPROX */
|
|
else
|
|
{
|
|
/* Exact matching, no back references, use the parallel matcher. */
|
|
status = tre_tnfa_run_parallel(tnfa, string, (int)len, type,
|
|
tags, eflags, &eo);
|
|
}
|
|
|
|
if (status == REG_OK)
|
|
/* A match was found, so fill the submatch registers. */
|
|
tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
|
|
#ifndef TRE_USE_ALLOCA
|
|
if (tags)
|
|
xfree(tags);
|
|
#endif /* !TRE_USE_ALLOCA */
|
|
return status;
|
|
}
|
|
|
|
int
|
|
tre_regnexec(const regex_t *preg, const char *str, size_t len,
|
|
size_t nmatch, regmatch_t pmatch[], int eflags)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
/* CONSTCOND */
|
|
tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
|
|
|
|
return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
|
|
}
|
|
|
|
int
|
|
tre_regexec(const regex_t *preg, const char *str,
|
|
size_t nmatch, regmatch_t pmatch[], int eflags)
|
|
{
|
|
const char *newstr;
|
|
unsigned newflags;
|
|
size_t newlen;
|
|
|
|
if (eflags & REG_STARTEND) {
|
|
/* LINTED */
|
|
newstr = &str[pmatch[0].rm_so];
|
|
/* LINTED */
|
|
newlen = pmatch[0].rm_eo;
|
|
newflags = (unsigned)(eflags & ~REG_STARTEND);
|
|
} else {
|
|
newstr = str;
|
|
newlen = (size_t)-1;
|
|
newflags = (unsigned)eflags;
|
|
}
|
|
return tre_regnexec(preg, newstr, newlen, nmatch, pmatch, (int)newflags);
|
|
}
|
|
|
|
|
|
#ifdef TRE_WCHAR
|
|
|
|
int
|
|
tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len,
|
|
size_t nmatch, regmatch_t pmatch[], int eflags)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
|
|
}
|
|
|
|
int
|
|
tre_regwexec(const regex_t *preg, const wchar_t *str,
|
|
size_t nmatch, regmatch_t pmatch[], int eflags)
|
|
{
|
|
return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
|
|
}
|
|
|
|
#endif /* TRE_WCHAR */
|
|
|
|
int
|
|
tre_reguexec(const regex_t *preg, const tre_str_source *str,
|
|
size_t nmatch, regmatch_t pmatch[], int eflags)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
|
|
}
|
|
|
|
|
|
#ifdef TRE_APPROX
|
|
|
|
/*
|
|
Wrapper functions for approximate regexp matching.
|
|
*/
|
|
|
|
static int
|
|
tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len,
|
|
tre_str_type_t type, regamatch_t *match, regaparams_t params,
|
|
int eflags)
|
|
{
|
|
reg_errcode_t status;
|
|
int *tags = NULL, eo;
|
|
|
|
/* If the regexp does not use approximate matching features, the
|
|
maximum cost is zero, and the approximate matcher isn't forced,
|
|
use the exact matcher instead. */
|
|
if (params.max_cost == 0 && !tnfa->have_approx
|
|
&& !(eflags & REG_APPROX_MATCHER))
|
|
return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
|
|
eflags);
|
|
|
|
/* Back references are not supported by the approximate matcher. */
|
|
if (tnfa->have_backrefs)
|
|
return REG_BADPAT;
|
|
|
|
if (tnfa->num_tags > 0 && match->nmatch > 0)
|
|
{
|
|
#if TRE_USE_ALLOCA
|
|
tags = alloca(sizeof(*tags) * tnfa->num_tags);
|
|
#else /* !TRE_USE_ALLOCA */
|
|
tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
|
|
#endif /* !TRE_USE_ALLOCA */
|
|
if (tags == NULL)
|
|
return REG_ESPACE;
|
|
}
|
|
status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
|
|
match, params, eflags, &eo);
|
|
if (status == REG_OK)
|
|
tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo);
|
|
#ifndef TRE_USE_ALLOCA
|
|
if (tags)
|
|
xfree(tags);
|
|
#endif /* !TRE_USE_ALLOCA */
|
|
return status;
|
|
}
|
|
|
|
int
|
|
tre_reganexec(const regex_t *preg, const char *str, size_t len,
|
|
regamatch_t *match, regaparams_t params, int eflags)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
|
|
|
|
return tre_match_approx(tnfa, str, len, type, match, params, eflags);
|
|
}
|
|
|
|
int
|
|
tre_regaexec(const regex_t *preg, const char *str,
|
|
regamatch_t *match, regaparams_t params, int eflags)
|
|
{
|
|
return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags);
|
|
}
|
|
|
|
#ifdef TRE_WCHAR
|
|
|
|
int
|
|
tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len,
|
|
regamatch_t *match, regaparams_t params, int eflags)
|
|
{
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
return tre_match_approx(tnfa, str, len, STR_WIDE,
|
|
match, params, eflags);
|
|
}
|
|
|
|
int
|
|
tre_regawexec(const regex_t *preg, const wchar_t *str,
|
|
regamatch_t *match, regaparams_t params, int eflags)
|
|
{
|
|
return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags);
|
|
}
|
|
|
|
#endif /* TRE_WCHAR */
|
|
|
|
void
|
|
tre_regaparams_default(regaparams_t *params)
|
|
{
|
|
memset(params, 0, sizeof(*params));
|
|
params->cost_ins = 1;
|
|
params->cost_del = 1;
|
|
params->cost_subst = 1;
|
|
params->max_cost = INT_MAX;
|
|
params->max_ins = INT_MAX;
|
|
params->max_del = INT_MAX;
|
|
params->max_subst = INT_MAX;
|
|
params->max_err = INT_MAX;
|
|
}
|
|
|
|
#endif /* TRE_APPROX */
|
|
|
|
/* EOF */
|