Featured Post

Trie implementation in C

String matching using KMP algorithm : C++ Implementation

KMP Algorithm is one the well-known string matching algorithms. It finds a pattern in a string. The pattern can exist multiple times in the string. This implementation in C++ gives indexes of all such matches in the string to be searched.




e.g. 

String to be searched : "ABCDBCAAB ABCDABCDABDE ABCDABD"
Pattern : "ABCD"

The result will be 4 index locations 0, 10, 14 and 23

 However, there is a limitation of KMP algorithm where the pattern overlaps.

 Consider this scenario :

 String to be searched: "ABCABCABCA"
 Pattern: "ABCA"
 The result from KMP algorithm will be 0 and 6 locations. It cannot identify the overlapping matches  like this:
 ABCABCABCA
 ABCABCABCA
 ABCABCABCA




#include <iostream>
#include <cstdlib>

using namespace std;
#define MAX_MATCHES 100

//Array to store matched indexes
int FOUND[MAX_MATCHES];
//variable to store last index in FOUND array
static int l = 0;

//Partial match table
void kmp_table(string W, int *T )
{
 int pos = 2;
 int cnd = 0;
 int length = W.length();
 
 T[0] = -1;
 T[1] = 0;
 
 while( pos < length)
 {
  if(W[pos-1] == W[cnd])
  {
   T[pos] = cnd + 1;
   cnd++;
   pos++;
  }
  else if( cnd > 0)
   cnd = T[cnd];
  else
  {
   T[pos] = 0;
   pos++;
  }
 }
}

//Search function
void kmp_search(string S, string W)
{
 
 int m = 0; 
 int i = 0;
 int sizeS = S.length();
 int sizeW = W.length();
 
 int *T = new int[sizeof(int) * sizeW];
 
 kmp_table(W, T);
 
 while( (m + i) < sizeS)
 {
  if (W[i] == S[m + i]) 
  {
            if (i == (sizeW - 1))
            {
             //Add the start index of match in the FOUND table
             FOUND[l++] = m;
   }
    
            i++;
        }
        else
        {
            if (T[i] > -1)
            {
                m = m + i - T[i];
    i = T[i];
            }
            else
            {
                m = m + 1;
    i = 0;
            }
        }
 }
 
 delete(T);
}

int main()
{
 string S = "ABCDBCAAB ABCDABCDABDE ABCDABD";
 string W = "ABCD";
 
 kmp_search(S,W);
  
 for (int i = 0 ; i < l; i++)
  cout<<"Pattern found at "<< FOUND[i] <<endl; 
}


Comments

  1. Thank you for this great example - has helped me understand IPC.

    ReplyDelete

Post a Comment

Please post your valuable suggestions