/*---------------------------------------------------------------------------
File name: kmp-2.cpp
Author: HJin (email: fish_sea_bird [at] yahoo [dot] com )
Created on: 10/13/2007 18:54:23
Environment: WinXPSP2 En Pro + VS2005 v8.0.50727.762
10/13/2007 20:13:17
-----------------------------------------------------------
0. modified the first implementation so that the current
presentation of algorithms
Index() and Index_KMP()
excatly immitates what the textbook said.
1. added boundary checking. Now this piece of code can
pass rigorous testing.
-----------------------------------------------------------
All indices start from 0, not 1.
O(n, m) means O(n) time, O(m) space.
O(n) means O(n) time, O(1) space.
This implementation does not check boundary conditions, such as
t=NULL, strlen(t)=0, etc.
*/
#include <iostream>
using namespace std;
/**
Algorithm 4.5 (Yan, Weimin, P 79)
O(n*m).
*/
int Index(char* s, char* t, int pos)
{
int i=pos, j=0, m, n;
/**
Check if s or t are NULL pointers. The reason:
you cannot apply strlen() to a NULL pointer.
*/
if(s==NULL)
{
return -1;
}
/**
s != NULL after this point.
*/
/**
We treat t=NULL as an empty string and return 0.
*/
if(t==NULL)
{
return 0;
}
/**
t != NULL after this point.
both s and t are not NULL after this point.
*/
/**
We can have a char pointer which has length 0
but not NULL:
char s[]="";
*/
n = strlen(s);
if(pos<0 || pos>=n || n==0)
{
return -1;
}
m = strlen(t);
if(m==0)
{
return 0;
}
if(m>n)
{
return -1;
}
/**
Boundary checking is done at this point. We have
s!=NULL, t!=NULL, n>=m>0, and 0<=pos<=n-1;
*/
while(i<n && j<m)
{
if(s[i] == t[j])
{
++i;
++j;
}
else
{
i += 1-j;
j = 0;
}
}
if(j == m)
{
return i-m;
}
else
{
return -1;
}
}
/** Compute the next[] table.
Algorithm 4.8 (Yan, Weimin, P 84)
O(m, m).
*/
void get_nextval(char* t, int*& nextval)
{
/**
boundary checking for t is done in Index_KMP().
*/
int m=strlen(t)-1, j=-1, i=0;
nextval[0]=-1;
while(i<m)
{
if(j==-1 || t[i]==t[j])
{
++i;
++j;
if(t[i]==t[j])
{
nextval[i] = nextval[j];
}
else
{
nextval[i] = j;
}
}
else
{
j=nextval[j];
}
}
}
/** Implement the KMP string matching algorithm.
Algorithm 4.6 (Yan, Weimin, P 82)
O(n+m, m).
*/
int Index_KMP(char* s, char* t, int pos)
{
int m, n, i=pos, j=0, *nextval=NULL;
/**
Check if s or t are NULL pointers. The reason:
you cannot apply strlen() to a NULL pointer.
*/
if(s==NULL)
{
return -1;
}
/**
s != NULL after this point.
*/
/**
We treat t=NULL as an empty string and return 0.
*/
if(t==NULL)
{
return 0;
}
/**
t != NULL after this point.
both s and t are not NULL after this point.
*/
/**
We can have a char pointer which has length 0
but not NULL:
char s[]="";
*/
n = strlen(s);
if(pos<0 || pos>=n || n==0)
{
return -1;
}
m = strlen(t);
if(m==0)
{
return 0;
}
if(m>n)
{
return -1;
}
/**
Boundary checking is done at this point. We have
s!=NULL, t!=NULL, n>=m>0, and 0<=pos<=n-1;
*/
nextval = new int[m];
get_nextval(t, nextval);
while(i<n && j<m)
{
if(j==-1 || s[i]==t[j])
{
++i;
++j;
}
else
{
j=nextval[j];
}
}
delete [] nextval;
if(j==m)
{
return i-m;
}
else
{
return -1;
}
}
int main()
{
char s[] = "abcdefgh";
char t[] = "cd";
//char t[] = "";
//char *t = NULL;
int i=-1, pos;
pos=-1;
i= Index(s, t, pos);
if(i!=-1)
{
cout<<s+i<<endl;
}
else
{
cout<<t<<" is not a substring of "<<s<<" starting at position "<<pos<<endl;
}
i=Index_KMP(s, t, pos);
if(i!=-1)
{
cout<<s+i<<endl;
}
else
{
cout<<t<<" is not a substring of "<<s<<" starting at position "<<pos<<endl;
}
return 0;
}