我的KMP算法做出来了,可是居然运行时间比普通匹配还慢??求高手解答,帮忙修改一下!

#include<time.h>
#include <iostream.h>
#include <string.h>
int Index_BF ( char S [ ], char T [ ], int pos )
{
int i = pos, j = 0;
while ( S[i+j] != '\0'&& T[j] != '\0')
if ( S[i+j] == T[j] )
j ++;
else
{
i ++; j = 0; }
if ( T[j] == '\0')
return i;
else
return -1;
}
void get_nextval(const char *T, int next[])
{
int j = 0, k = -1;
next[0] = -1;
while ( T[j/*+1*/] != '\0' )
{
if (k == -1 || T[j] == T[k])
{
++j; ++k;
if (T[j]!=T[k])
next[j] = k;
else
next[j] = next[k];
}// if
else
k = next[k];
}

}

int KMP(const char *Text,const char* Pattern) //const 表示函数内部不会改变这个参数的值。
{
if( !Text||!Pattern|| Pattern[0]=='\0' || Text[0]=='\0' )//
return -1;//空指针或空串,返回-1。
int len=0;
const char * c=Pattern;
while(*c++!='\0')//移动指针比移动下标快。
{
++len;//字符串长度。
}
int *next=new int[len+1];
get_nextval(Pattern,next);//求Pattern的next函数值

int index=0,i=0,j=0;
while(Text[i]!='\0' && Pattern[j]!='\0' )
{
if(Text[i]== Pattern[j])
{
++i;// 继续比较后继字符
++j;
}
else
{
index += j-next[j];
if(next[j]!=-1)
j=next[j];// 模式串向右移动
else
{
j=0;
++i;
}
}
}//while

delete []next;
if(Pattern[j]=='\0')
return index;// 匹配成功
else
return -1;
}

int main()//abCabCad
{
int i;

char* text= "bababCabCadca";
char*pattern="adCadCad";
cout<<("主串为: ")<<endl;
cout<<("bababCabCadca")<<endl;
cout<<("子串为: ")<<endl;
cout<<("adCadCad")<<endl;
clock_t start, finish;
double duration;
//普通匹配算法
cout<<( "Time to do Index loops is ") ;
start = clock();
for(int k=0;k<5000000;k++)
{
i=Index_BF(text,pattern,1);

}
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
cout<<( "%f seconds\n", duration )<<endl;
//KMP匹配算法
cout<<( "Time to do KMP loops is ")<<endl;
start = clock();
for(int j=0;j<5000000;j++)
{
i=KMP(text,pattern);
}
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
cout<<( "%f seconds\n", duration )<<endl;
return 0;
}
要求是实现字符串匹配的简单匹配算法和KMP算法,并且使用相同的比较字符串重复比较至少5000次,计算两者的时间差别。请使用C语言。

①你的KMP代码里很多东西导致KMP算法变慢:
1、计算主字符串长度很浪费时间,可以不用计算的,事先开辟一个大空间。
2、new int[len+1];很浪费时间,事先开辟大空间,或者用多级内存管理。
3、delete []next很浪费时间。

②但是①说的不是本质,真正的原因是:KMP的优点是避免重复匹配的记忆功能,缺点是启动慢构造next,这就导致如果要被匹配的主字符串太短(少于1k个字符都算短的)。
而朴素算法启动不需要时间。

③另一方面,你的main中主字符串和匹配字符串没有相似性(只在最后2个字符串才进行了一次大的返回),而KMP的优点就在于处理相似字符串匹配,相似度越高,字符串越长,匹配效果越好。

④我改了下你的代码,增加的主字符串的长度,提高了字符串相似程度,结果就是KMP的时间比朴素算法要好接近30%:
Time to do Index loops is 32.031
Time to do KMP loops is 23.609

⑤代码如下:

#include<time.h>
#include <iostream.h>
#include <string.h>
int Index_BF ( char S [ ], char T [ ], int pos )
{
int i = pos, j = 0;
while ( S[i+j] != '\0'&& T[j] != '\0')
if ( S[i+j] == T[j] )
j ++;
else
{
i ++; j = 0; }
if ( T[j] == '\0')
return i;
else
return -1;
}
void get_nextval(const char *T, int next[])
{
int j = 0, k = -1;
next[0] = -1;
while ( T[j/*+1*/] != '\0' )
{
if (k == -1 || T[j] == T[k])
{
++j; ++k;
if (T[j]!=T[k])
next[j] = k;
else
next[j] = next[k];
}// if
else
k = next[k];
}

}

int KMP(const char *Text,const char* Pattern) //const 表示函数内部不会改变这个参数的值。
{
if( !Text||!Pattern|| Pattern[0]=='\0' || Text[0]=='\0' )//
return -1;//空指针或空串,返回-1。

static int next[50];
get_nextval(Pattern,next);//求Pattern的next函数值

static int index,i,j;
index=i=j=0;
for(;Text[i]!='\0' && Pattern[j]!='\0';)
{
if(Text[i]== Pattern[j])
{
++i;// 继续比较后继字符
++j;
}
else
{
index += j-next[j];
if(next[j]!=-1)
j=next[j];// 模式串向右移动
else
{
j=0;
++i;
}
}
}

//delete []next;
if(Pattern[j]=='\0')
return index;// 匹配成功
else
return -1;
}

int main()//abCabCad
{
int i;

char* text= "jegiradCadCegjirjgirjgirjfewijfejifgjadCadCadCadCadCadCadCadCadCadCadCadCadCadCerigjirjgirejgirejigjreigreigjreigjirgjeigjirjgigijirjeihjirhjirjhirjgijerigjir\
fewfjiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergergergregregregergregregergr\
jegiradCadCegjirjgirjgirjfewijfejifgjadCadCadCadCadCadCadCadCadCadCadCadCadCadCerigjirjgirejgirejigjreigreigjreigjirgjeigjirjgigijirjeihjirhjirjhirjgijerigjir\
fewfjiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergergergregregregergregregergr\
jegiradCadCegjirjgirjgirjfewijfejifgjadCadCadCadCadCadCadCadCadCadCadCadCadCadCerigjirjgirejgirejigjreigreigjreigjirgjeigjirjgigijirjeihjirhjirjhirjgijerigjir\
fewfjiadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttcc\
adCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttcc\
adCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttccadCadCadtttcc\
fewieiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeffefiwejfijiwfjiewjfijeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiegirjgirjgirjigjierjgijrighrjigjeigjadCadCadjreigjrijgirejgijreigjerjigejrbababCabCadcadewfewgfgergregegrgergeiwjjgiejijriegjirigjadCadCadtttccc\
adCadCadtttccctctc";

char*pattern="adCadCadtttccctctc";

clock_t start, finish;
double duration;
//普通匹配算法
cout<<( "Time to do Index loops is ") ;
start = clock();
for(int k=0;k<50000;k++)
{
i=Index_BF(text,pattern,1);

}
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
cout<<( "%f seconds\n", duration )<<endl;
//KMP匹配算法
cout<<( "Time to do KMP loops is ")<<endl;
start = clock();
for(int j=0;j<50000;j++)
{
i=KMP(text,pattern);
}
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
cout<<( "%f seconds\n", duration )<<endl;

cin>>finish;
return 0;

}
温馨提示:答案为网友推荐,仅供参考
第1个回答  2011-01-13
问你两个问题...
1.你理解了KMP算法吗?
2.你熟悉C++的string类吗?
你的这几行代码,我实在不想评价了,只能对你说:“多动手吧!”
你的错误太多,只知道照书抄,完全没理解算法的含义,你懂算法思想并不代表你能用代码写出你的思想!

书中使用的是SString,是自己定义的一个结构体,结果被你硬生生的换成string类,string[0]是字符串的第一个字符,而SString[0]是字符串长度,SString[1]才是第一个字符...这里面的错误自己去理解吧

nextval[0]究竟有什么用?你根本没理解...书中用的是0,我这里给你改成-1...为什么就自己去探究吧...

下面是给你改的代码:
#include<iostream>
#include<string>
using namespace std;
int nextval[255];
int strcmp(string s,string t);
int kmp(string s,string t,int pos);
void get_nextval(string t,int nextval[]);
int main()
{
string s="123456987";
string t;
while(cin>>t)
{
cout<<"在"<<kmp(s,t,0)<<"处匹配成功!"<<endl;//改动1
}

for(int i=1;i<t.length();++i)
cout<<nextval[i];
return 0;
}
void get_nextval(string t,int nextval[])
{
int i=1;
nextval[0]=-1; //改动2
nextval[1]=0; //改动3
int j=0;
while(i < t.length())
{
if(j==-1||t[i]==t[j]) //改动4
{
++i; ++j;
if(t[i]!=t[j])
nextval[i]=j;
else
nextval[i]=nextval[j];
}
else j=nextval[j];
}
}
int kmp(string s,string t,int pos)
{
//int next[512];//定义一个数组
get_nextval(t,nextval);//获得数组
int i=0;
i=pos; int j=0; //改动5
while(i < s.length()&&j < (int)t.length()) //改动6
{
if(j==-1||s[i]==t[j]) //改动7

else j=nextval[j];
}
if(j >= t.length()) //改动8
return i-t.length();
else
return 0;
}
//////////////////////////整个程序自己慢慢消化吧 希望你以后不要再浮于表面的学习