gpt4 book ai didi

c++ - strstr 是否有反向功能

转载 作者:IT老高 更新时间:2023-10-28 22:26:17 48 4
gpt4 key购买 nike

我正在尝试找到一个与 strstr 类似的函数,该函数从字符串的末尾开始搜索子字符串。

最佳答案

标准 C 库没有“反向 strstr”函数,因此您必须自己查找或编写。

我想出了几个我自己的解决方案,并在这个线程中添加了一些测试和基准测试代码以及​​其他功能。对于那些好奇,在我的笔记本电脑(Ubuntu karmic,amd64 架构)上运行的输出如下所示:

$ gcc -O2 --std=c99 strrstr.c && ./a.out
#1 0.123 us last_strstr
#2 0.440 us theo
#3 0.460 us cordelia
#4 1.690 us digitalross
#5 7.700 us backwards_memcmp
#6 8.600 us sinan

您的结果可能不同,并且根据您的编译器和库,结果的顺序也可能不同。

要从字符串的开头获取匹配的偏移量(索引),使用指针算法:

char *match = last_strstr(haystack, needle);
ptrdiff_t index;
if (match != NULL)
index = match - haystack;
else
index = -1;

现在,落叶松(请注意,这纯粹是用 C 语言编写的,我对 C++ 的了解不够好,无法给出答案):

#include <string.h>
#include <stdlib.h>

/* By liw. */
static char *last_strstr(const char *haystack, const char *needle)
{
if (*needle == '\0')
return (char *) haystack;

char *result = NULL;
for (;;) {
char *p = strstr(haystack, needle);
if (p == NULL)
break;
result = p;
haystack = p + 1;
}

return result;
}


/* By liw. */
static char *backwards_memcmp(const char *haystack, const char *needle)
{
size_t haylen = strlen(haystack);

if (*needle == '\0')
return (char *) haystack;

size_t needlelen = strlen(needle);
if (needlelen > haylen)
return NULL;

const char *p = haystack + haylen - needlelen;
for (;;) {
if (memcmp(p, needle, needlelen) == 0)
return (char *) p;
if (p == haystack)
return NULL;
--p;
}
}


/* From http://stuff.mit.edu/afs/sipb/user/cordelia/Diplomacy/mapit/strrstr.c
*/
static char *cordelia(const char *s1, const char *s2)
{
const char *sc1, *sc2, *psc1, *ps1;

if (*s2 == '\0')
return((char *)s1);

ps1 = s1 + strlen(s1);

while(ps1 != s1) {
--ps1;
for (psc1 = ps1, sc2 = s2; ; )
if (*(psc1++) != *(sc2++))
break;
else if (*sc2 == '\0')
return ((char *)ps1);
}
return ((char *)NULL);
}


/* From http://stackoverflow.com/questions/1634359/
is-there-a-reverse-fn-for-strstr/1634398#1634398 (DigitalRoss). */
static char *reverse(const char *s)
{
if (s == NULL)
return NULL;
size_t i, len = strlen(s);
char *r = malloc(len + 1);

for(i = 0; i < len; ++i)
r[i] = s[len - i - 1];
r[len] = 0;
return r;
}
char *digitalross(const char *s1, const char *s2)
{
size_t s1len = strlen(s1);
size_t s2len = strlen(s2);
const char *s;

if (s2len == 0)
return (char *) s1;
if (s2len > s1len)
return NULL;
for (s = s1 + s1len - s2len; s >= s1; --s)
if (strncmp(s, s2, s2len) == 0)
return (char *) s;
return NULL;
}


/* From http://stackoverflow.com/questions/1634359/
is-there-a-reverse-fn-for-strstr/1634487#1634487 (Sinan Ünür). */

char *sinan(const char *source, const char *target)
{
const char *current;
const char *found = NULL;

if (*target == '\0')
return (char *) source;

size_t target_length = strlen(target);
current = source + strlen(source) - target_length;

while ( current >= source ) {
if ( (found = strstr(current, target)) ) {
break;
}
current -= 1;
}

return (char *) found;
}


/* From http://stackoverflow.com/questions/1634359/
is-there-a-reverse-fn-for-strstr/1634441#1634441 (Theo Spears). */
char *theo(const char* haystack, const char* needle)
{
size_t needle_length = strlen(needle);
const char* haystack_end = haystack + strlen(haystack) - needle_length;
const char* p;
size_t i;

if (*needle == '\0')
return (char *) haystack;
for(p = haystack_end; p >= haystack; --p)
{
for(i = 0; i < needle_length; ++i) {
if(p[i] != needle[i])
goto next;
}
return (char *) p;

next:;
}
return 0;
}


/*
* The rest of this code is a test and timing harness for the various
* implementations above.
*/


#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>


/* Check that the given function works. */
static bool works(const char *name, char *(*func)(const char *, const char *))
{
struct {
const char *haystack;
const char *needle;
int offset;
} tests[] = {
{ "", "", 0 },
{ "", "x", -1 },
{ "x", "", 0 },
{ "x", "x", 0 },
{ "xy", "x", 0 },
{ "xy", "y", 1 },
{ "xyx", "x", 2 },
{ "xyx", "y", 1 },
{ "xyx", "z", -1 },
{ "xyx", "", 0 },
};
const int num_tests = sizeof(tests) / sizeof(tests[0]);
bool ok = true;

for (int i = 0; i < num_tests; ++i) {
int offset;
char *p = func(tests[i].haystack, tests[i].needle);
if (p == NULL)
offset = -1;
else
offset = p - tests[i].haystack;
if (offset != tests[i].offset) {
fprintf(stderr, "FAIL %s, test %d: returned %d, haystack = '%s', "
"needle = '%s', correct return %d\n",
name, i, offset, tests[i].haystack, tests[i].needle,
tests[i].offset);
ok = false;
}
}
return ok;
}


/* Dummy function for calibrating the measurement loop. */
static char *dummy(const char *haystack, const char *needle)
{
return NULL;
}


/* Measure how long it will take to call the given function with the
given arguments the given number of times. Return clock ticks. */
static clock_t repeat(char *(*func)(const char *, const char *),
const char *haystack, const char *needle,
long num_times)
{
clock_t start, end;

start = clock();
for (long i = 0; i < num_times; ++i) {
func(haystack, needle);
}
end = clock();
return end - start;
}


static clock_t min(clock_t a, clock_t b)
{
if (a < b)
return a;
else
return b;
}


/* Measure the time to execute one call of a function, and return the
number of CPU clock ticks (see clock(3)). */
static double timeit(char *(*func)(const char *, const char *))
{
/* The arguments for the functions to be measured. We deliberately
choose a case where the haystack is large and the needle is in
the middle, rather than at either end. Obviously, any test data
will favor some implementations over others. This is the weakest
part of the benchmark. */

const char haystack[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"b"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
const char needle[] = "b";

/* First we find out how many repeats we need to do to get a sufficiently
long measurement time. These functions are so fast that measuring
only a small number of repeats will give wrong results. However,
we don't want to do a ridiculously long measurement, either, so
start with one repeat and multiply it by 10 until the total time is
about 0.2 seconds.

Finally, we measure the dummy function the same number of times
to get rid of the call overhead.

*/

clock_t mintime = 0.2 * CLOCKS_PER_SEC;
clock_t clocks;
long repeats = 1;
for (;;) {
clocks = repeat(func, haystack, needle, repeats);
if (clocks >= mintime)
break;
repeats *= 10;
}

clocks = min(clocks, repeat(func, haystack, needle, repeats));
clocks = min(clocks, repeat(func, haystack, needle, repeats));

clock_t dummy_clocks;

dummy_clocks = repeat(dummy, haystack, needle, repeats);
dummy_clocks = min(dummy_clocks, repeat(dummy, haystack, needle, repeats));
dummy_clocks = min(dummy_clocks, repeat(dummy, haystack, needle, repeats));

return (double) (clocks - dummy_clocks) / repeats / CLOCKS_PER_SEC;
}


/* Array of all functions. */
struct func {
const char *name;
char *(*func)(const char *, const char *);
double secs;
} funcs[] = {
#define X(func) { #func, func, 0 }
X(last_strstr),
X(backwards_memcmp),
X(cordelia),
X(digitalross),
X(sinan),
X(theo),
#undef X
};
const int num_funcs = sizeof(funcs) / sizeof(funcs[0]);


/* Comparison function for qsort, comparing timings. */
int funcmp(const void *a, const void *b)
{
const struct func *aa = a;
const struct func *bb = b;

if (aa->secs < bb->secs)
return -1;
else if (aa->secs > bb->secs)
return 1;
else
return 0;
}


int main(void)
{

bool ok = true;
for (int i = 0; i < num_funcs; ++i) {
if (!works(funcs[i].name, funcs[i].func)) {
fprintf(stderr, "%s does not work\n", funcs[i].name);
ok = false;
}
}
if (!ok)
return EXIT_FAILURE;

for (int i = 0; i < num_funcs; ++i)
funcs[i].secs = timeit(funcs[i].func);
qsort(funcs, num_funcs, sizeof(funcs[0]), funcmp);
for (int i = 0; i < num_funcs; ++i)
printf("#%d %.3f us %s\n", i+1, funcs[i].secs * 1e6, funcs[i].name);

return 0;
}

关于c++ - strstr 是否有反向功能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/1634359/

48 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com