Featured Post

Trie implementation in C

Hash Table

Hashing is one of the best techniques when it comes to storing a large amount of random data or it can be unrandom data too :)

It is generally easy to store your data into an array or a linked list. But the searching becomes very hectic and time consuming when this data grows to millions of records.

So the big guys came up with hashing as an alternative to store and retrieve large amounts of data. The search became much faster upto O(1) if there are no collisions!!

Well lets discuss how hashing works if you really want that whatever I have written above starts making sense.

We store data into Hash tables on the basis of a key generated by a hash function.
This hash function uses some formula to generate a key which is in the range of indexes of the Hash Table. And then we store that data into the Hash Table with the help of this key.

key = hash(data);
HashTable[key] = data ;

Now you may be wondering that how this function works.
Take an example :-
If the data to be stored are integers we can define our hash function to return

data % prime

where prime is any prime number(preferably a big prime number)

But now one more problem arises. If we are taking such a formula then the key could be duplicated. Well this situation is called a 'Collision'.

There are various methods for Collision Resoultion.
e.g. Rehashing , Separate Chaining

Separate Chaining is being used by the code snippet below.
Normally data is stored one at a single index. But to resolve collision, at each index we maintain a linked list instead to which we can add our data when the collision occurs. When at the time of searching we get an index , we actually reach at the head of the linked list and then we can continue searching the Linked List.

This has reduced our searching time from searching a million records to few hundreds.So enjoy the code.

P.S : As I am a normal human being , there would be bugs in this code too. Please point out those so that I can improve the quality of the code here.

//linklist_impl.hpp
#include<string>
using namespace std;
class linklist
{
    int c;
    struct node
    {
    public:
        string data;
        node *link;
    } *p;
public:
    linklist();
    void append(string &str);
    void del(string &str);
    void display();
    string getData();
    int searchlist(string);
    bool isListEmpty();
    ~linklist();
};


//linklist_impl.cpp
#include 
#include 
#include 
#include "linklist_impl.hpp"
using namespace std;


linklist::linklist()
{
    p = NULL;
    c = 0;
}

void linklist::append(string &str)
{
    node *q, *t;
    if(p==NULL)
    {
        p = new node;
        p->data = str;
        p->link = NULL;
        c += 1;
    }
    else
    {
        q = p;
        while(q->link != NULL)
        {
            q = q->link;
        }
        t = new node;
        t->data = str;
        t->link = NULL;
        q->link = t;
        c += 1;
    }
}

void linklist::del(string &str)
{
    node *q, *r;
    q = p;
    if(q->data == str)
    {
        p = q->link;
        delete q;
        c -= 1;
        return;
    }
    r = q;
    while(q != NULL)
    {
        if(q->data == str)
        {
            r->link = q->link;
            delete q;
            c -= 1;
            return;
        }
        r = q;
        q = q->link;
    }
    cout<<"Element "<<str<<" not found"<<endl;
}

void linklist::display()
{
    node *q;
    int i;
    for(q = p, i = 0; q != NULL, i < c; q = q->link, i++)
    {
        cout<<"\t"<<i<<": "<<q->data<<endl;
    }
}

string linklist::getData()
{
  return p->data;
}

bool linklist::isListEmpty()
{
    node *q;
    int i,flag = 0;
    for(q = p, i = 0; q != NULL, i < c; q = q->link, i++)
    {
      if(q->data != "")
        flag = 1;
    }
    if(flag == 1)
      return false;
    else 
      return true;
}

int linklist::searchlist(string searchItem)
{
    node *q;
    int i;
    for(q = p, i = 0; q != NULL, i < c; q = q->link, i++)
    {
      if(q -> data == searchItem)
        return i;
    }
    return -1;
}

linklist::~linklist()
{
    node *q;
    if(p == NULL)
    {
        return;
    }
    while(p != NULL)
    {
        q = p->link;
        delete p;
        p = q;
    }
}



//schashtable.cpp
#include<iostream>
#include<sys/time.h>
#include<stdlib.h>
#include<stdio.h>
#include<limits>
#include<cstring>
#include "linklist_impl.hpp"

using namespace std;
struct timeval starttime,endtime,timediff;
enum {FILLED = 0 , ALL};
const int sizeHTable = 5;


class table
{

private:
    linklist *items;
    int nel;
    int strhash(const char *str); 
public:
    table(int size);
    ~table();
    void addItem();
    int searchTable(string);
    void delItem();
    void display( int mode);
    int getCount();
    bool isEmpty();
};

#define BAD_INPUT_CUTOFF 3
unsigned int getIntInRange(unsigned int min, unsigned int max, 
                           const char *prompt)
{
    int input, //used to get input
    bad_count=0; //used to keep track of how many bad attempts.
    do
    {
        std::cout << prompt; 
        std::cin >> input; 
        if (!std::cin.good() || input < min || input > max)
        {
            std::cout << "\nInvalid Input!" << std::endl;
            std::cin.clear(); //resets the state flag
            
//clears out the buffer
std::cin.ignore(std::numeric_limits<std::streamsize>::max(),'\n'); 
                   
            bad_count++; //User made a bad input, count it
            input = 0;   // user did not enter a valid number
        }
        //See if this is a valid number (users are mean/stupid sometimes)
    }
    while (input == 0 && bad_count < BAD_INPUT_CUTOFF);
    return input;
}


table::table(int size)
{
    items = new linklist[size];
    for(int i = 0; i<sizeHTable ;i++)
    nel = 0;
}

table::~table()
{   
    if(items)
    delete[] items;
}

int table::strhash(const char *str) {
    unsigned long hash = 0;
      int i=0;
        while(i <= strlen(str)) {
              hash = hash + (hash<<5) + (unsigned long)(str[i]); 
                  i++;
                    }
          return (hash%sizeHTable); //return the hashed number which must be  
                                    //between 0 an size-1, this is why I used
                                    //hash%size
}

void table::addItem()
{
        string str;
        
        cout<<"\nEnter Data : ";
        cin>>str;

        
        int i = strhash(str.c_str());
        if( i >= 0 && i < sizeHTable )
        {
            items[i].append(str);
            cout<<"\nInserted";
            nel++;
            return;
        }

}

int table::searchTable(string searchItem)
{
    int pos;
    
    int i = strhash(searchItem.c_str());

    if((pos = items[i].searchlist(searchItem)) != -1 )
    {
        cout<<"Bucket["<<i<<"] -> pos : "<<pos<<"\t" <<"\t"<< endl;
        return i;
    }
    else
    {
            cout<<"Data "<<searchItem<<" not present in the table" << endl;
            return -1;
    }
}

void table::delItem()
{
    string item;
    cout<<"\nEnter an item to delete : ";
    cin>>item;
    
    int index;
    index = strhash(item.c_str());
        items[index].del(item);
        nel--;
}

bool table::isEmpty()
{
    if( nel > 0)
    {
        return false;
    }
    else
    {
        cout<<"\nTable is Empty"<<endl;
        return true;
    }
}

void table::display(int mode = FILLED)
{
    if(isEmpty())
        return;

   if(mode == FILLED)
   {
     int i;
     for(i=0; i<sizeHTable; i++)
     {  
        if(items[i].isListEmpty() == false)
        {
          cout<<"Bucket["<<i<<"] :"<<endl;
          items[i].display(); //call the function of our linked list, 
                              //which outputs it's records with their indexes
        }
     }
   }  
   else if(mode == ALL)
   { 
     int i;
     for(i=0; i<sizeHTable; i++)
     {
        cout<<"Bucket["<<i<<"] :"<<endl;
        items[i].display(); //call the function of our linked list, 
                            //which outputs it's records with their indexes
     }
   }  
}

int table::getCount()
{
    return nel;
}

int main()
{
    int choice;
    
    string item;
    table T1(sizeHTable);
    int mode;

    while(1)
    {
        choice = getIntInRange(1,6,"\n1.Add New Item\n2.Delete Item 
                 \n3.Search Item \n4.Display Hash Table \n5.Get the 
                  number of elements \n6.Exit\nEnter your choice : ");
        
            switch(choice)
            {
            case 1 :
                T1.addItem();
                break;
            case 2 :
                T1.delItem();
                break;
            case 3 :
                if(T1.isEmpty() != true)
                {
                    cout<<"\nEnter an item to search : ";
                    cin>>item;
                    T1.searchTable(item);
                }
                break;
            case 4 :
                cout<<"\nEnter display mode (0 - FILLED / 1- ALL) : ";
                cin>>mode;
                T1.display(mode);
                break;
             case 5 :
                cout<<"Total no. of elements : "<<T1.getCount()<<endl;
                break;
             case 6 :
                exit(0);
            }
        
    }
    
    
}



Run the example in gnu compiler as

user@linux~> g++ linklist_impl.cpp schashtable.cpp
user@linux~> ./a.out

or you can also use makefile.

Comments