2013-04-06 16:48:33 +02:00

467 lines
9.9 KiB
C

/*
bench.c - simple regex benchmark program
This software is released under a BSD-style license.
See the file LICENSE for details and copyright.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif /* HAVE_GETOPT_H */
#include <time.h>
#include <unistd.h>
#include <math.h>
#include <sys/types.h>
#if 0
#include <hackerlab/rx-posix/regex.h>
#else
#include <regex.h>
#endif
/* T distribution for alpha = 0.025 (for 95% confidence). XXX - is
this correct? */
double t_distribution[] = {
12.71,
4.303,
3.182,
2.776,
2.571,
2.447,
2.365,
2.306,
2.262,
2.228,
2.201,
2.179,
2.160,
2.145,
2.131,
2.120,
2.110,
2.101,
2.093,
2.086,
2.080,
2.074,
2.069,
2.064,
2.060,
2.056,
2.052,
2.048,
2.045,
2.042
};
void
stats(double *sample_data, int samples, int len)
{
double mean, tmp1, tmp2, variance, stddev, error, percent;
int i;
mean = 0;
for (i = 0; i < samples; i++)
mean += sample_data[i];
mean = mean/i;
printf("# mean: %.5f\n", mean);
tmp1 = 0;
for (i = 0; i < samples; i++) {
tmp2 = sample_data[i] - mean;
tmp1 += tmp2*tmp2;
}
if (samples > 1)
variance = tmp1 / (samples-1);
else
variance = 0;
stddev = sqrt(variance);
printf("# variance: %.16f\n", variance);
printf("# standard deviation: %.16f\n", stddev);
error = t_distribution[samples-1] * stddev / sqrt(samples);
if (mean != 0)
percent = 100*error/mean;
else
percent = 0;
printf("# error: ±%.16f (±%.4f%%)\n", error, percent);
printf("%d\t%.5f\t%.5f\n", len, mean, error);
fflush(stdout);
}
void
run_tests(int len, int samples, double *sample_data, int repeats,
regex_t *reobj, char *str, char *tmpbuf)
{
int i, j, errcode;
clock_t c1, c2;
regmatch_t pmatch[10];
printf("# len = %d\n", len);
fflush(stdout);
for (i = 0; i < samples; i++) {
c1 = clock();
for (j = 0; j < repeats; j++)
if ((errcode = tre_regexec(reobj, str, 10, pmatch, 0))) {
tre_regerror(errcode, reobj, tmpbuf, 255);
printf("error: %s\n", tmpbuf);
}
c2 = clock();
sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);
printf("# sample: %.5f sec, clocks: %ld\n",
(double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
(long)(c2-c1));
fflush(stdout);
}
fflush(stdout);
for (i = 0; i < 10; i += 2) {
printf("# pmatch[%d].rm_so = %d\n", i/2, (int)pmatch[i/2].rm_so);
printf("# pmatch[%d].rm_eo = %d\n", i/2, (int)pmatch[i/2].rm_eo);
}
}
int
main(int argc, char **argv)
{
regex_t reobj;
char *str;
char tmpbuf[256];
int i, j;
int max_len = 1024*1024*10;
int steps = 20;
int repeats = 10;
int samples = 20;
int len;
clock_t c1, c2;
int opt;
double sample_data[30];
int test_id = -1;
while ((opt = getopt(argc, argv, "r:l:s:j:t:")) != -1) {
switch (opt) {
case 't':
test_id = atoi(optarg);
break;
case 'l':
max_len = atoi(optarg);
break;
case 'j':
steps = atoi(optarg);
break;
case 's':
samples = atoi(optarg);
break;
case 'r':
repeats = atoi(optarg);
break;
default:
printf("Pälli.\n");
return 1;
}
}
/* XXX - Check that the correct results are returned. For example, GNU
regex-0.12 returns incorrect results for very long strings in
test number 1. */
switch (test_id) {
case 0:
printf("# pattern: \"a*\"\n");
printf("# string: \"aaaaaa...\"\n");
len = 0;
tre_regcomp(&reobj, "a*", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+1));
for (i = 0; i < len; i++)
str[i] = 'a';
str[len-1] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 1:
printf("# pattern: \"(a)*\"\n");
printf("# string: \"aaaaaa...\"\n");
len = 0;
tre_regcomp(&reobj, "(a)*", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+1));
for (i = 0; i < len; i++)
str[i] = 'a';
str[len-1] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 2:
printf("# pattern: \"(a*)\"\n");
printf("# string: \"aaaaaa...\"\n"); len = 0;
tre_regcomp(&reobj, "(a*)", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+1));
for (i = 0; i < len; i++)
str[i] = 'a';
str[len-1] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 3:
printf("# pattern: \"(a*)*|b*\"\n");
printf("# string: \"aaaaaa...b\"\n");
len = 0;
tre_regcomp(&reobj, "(a*)*|b*", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+1));
for (i = 0; i < len-1; i++)
str[i] = 'a';
if (len > 0)
str[len-1] = 'b';
str[len] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 4:
printf("# pattern: \"(a|a|a|...|a)\"\n");
printf("# string: \"aaaaaa...\"\n");
len = 1024*1024;
str = malloc(sizeof(char) * (len+1));
for (i = 0; i < len-1; i++)
str[i] = 'a';
str[len] = '\0';
len = 0;
while (len <= max_len) {
tmpbuf[0] = '(';
for (i = 1; i < (len*2); i++) {
tmpbuf[i] = 'a';
if (i < len*2-2) {
i++;
tmpbuf[i] = '|';
}
}
printf("# i = %d\n", i);
tmpbuf[i] = ')';
tmpbuf[i+1] = '*';
tmpbuf[i+2] = '\0';
printf("# pat = %s\n", tmpbuf);
tre_regcomp(&reobj, tmpbuf, REG_EXTENDED);
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
tre_regfree(&reobj);
}
free(str);
break;
case 5:
printf("# pattern: \"foobar\"\n");
printf("# string: \"aaaaaa...foobar\"\n");
len = 0;
tre_regcomp(&reobj, "foobar", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+7));
for (i = 0; i < len; i++) {
if (i*i % 3)
str[i] = 'a';
else
str[i] = 'a';
}
str[len+0] = 'f';
str[len+1] = 'o';
str[len+2] = 'o';
str[len+3] = 'b';
str[len+4] = 'a';
str[len+5] = 'r';
str[len+6] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 6:
printf("# pattern: \"a*foobar\"\n");
printf("# string: \"aaaaaa...foobar\"\n");
len = 0;
tre_regcomp(&reobj, "a*foobar", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+7));
for (i = 0; i < len; i++) {
str[i] = 'a';
}
str[len+0] = 'f';
str[len+1] = 'o';
str[len+2] = 'o';
str[len+3] = 'b';
str[len+4] = 'a';
str[len+5] = 'r';
str[len+6] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 7:
printf("# pattern: \"(a)*foobar\"\n");
printf("# string: \"aaaaabbaaab...foobar\"\n");
len = 0;
tre_regcomp(&reobj, "(a)*foobar", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+7));
for (i = 0; i < len; i++) {
/* Without this GNU regex won't find a match! */
if (i*(i-1) % 3)
str[i] = 'b';
else
str[i] = 'a';
}
str[len+0] = 'f';
str[len+1] = 'o';
str[len+2] = 'o';
str[len+3] = 'b';
str[len+4] = 'a';
str[len+5] = 'r';
str[len+6] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 8:
printf("# pattern: \"(a|b)*foobar\"\n");
printf("# string: \"aaaaabbaaab...foobar\"\n");
len = 0;
tre_regcomp(&reobj, "(a|b)*foobar", REG_EXTENDED);
while (len <= max_len) {
str = malloc(sizeof(char) * (len+7));
for (i = 0; i < len; i++) {
if (i*(i-1) % 3)
str[i] = 'b';
else
str[i] = 'a';
/* Without this GNU regex won't find a match! */
if (i % (1024*1024*10 - 100))
str[i] = 'f';
}
str[len+0] = 'f';
str[len+1] = 'o';
str[len+2] = 'o';
str[len+3] = 'b';
str[len+4] = 'a';
str[len+5] = 'r';
str[len+6] = '\0';
run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
case 9:
printf("# pattern: hand-coded a*\n");
printf("# string: \"aaaaaa...\"\n");
len = 0;
while (len <= max_len) {
printf("# len = %d\n", len);
str = malloc(sizeof(char)*(len+1));
for (i = 0; i < len; i++)
str[i] = 'a';
str[len-1] = '\0';
for (i = 0; i < samples; i++) {
c1 = clock();
for (j = 0; j < repeats; j++) {
char *s;
int l;
s = str;
l = 0;
while (s != '\0') {
if (*s == 'a') {
s++;
l++;
} else
break;
}
}
c2 = clock();
sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);
printf("# sample: %.5f sec, clocks: %ld\n",
(double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
(long)(c2-c1));
fflush(stdout);
}
fflush(stdout);
stats(sample_data, samples, len);
len = len + (max_len/steps);
free(str);
}
break;
default:
printf("Pelle.\n");
return 1;
}
tre_regfree(&reobj);
return 0;
}