Question

Implement a rabin_hash method to make the following code work: int rabin_karp_batchmatch(int bsz, /* size of...

Implement a rabin_hash method to make the following code work:

int
rabin_karp_batchmatch(int bsz,        /* size of bitmap (in bits) to be used */
                      int k,          /* chunk length to be matched */
                      const char *qs, /* query docoument (X)*/
                      int m,          /* query document length */ 
                      const char *ts, /* to-be-matched document (Y) */
                      int n           /* to-be-matched document length*/)
{
  /*if the to-be-matched document is less than k length, return false*/
  if (n < k)
    return 0;
  /*start our filter, allocating the necessary memory*/
  bloom_filter filter = bloom_init(bsz);
  /*compute the hash factor.*/
  long long hash_factor = 1;
  int i;
  int j;
  for (i = 0; i < k - 1; i++)
  {
    hash_factor = mmul(hash_factor, 256);
  }
  int score = 0;
  for (i = 0 ; i < m / k ; i++)
  {
    /*add each length k chunk into the bloom filter.*/
    bloom_add(filter, rabin_hash(qs + i * k, k));
  }
  bloom_print(filter, PRINT_BLOOM_BITS);
  long long rolling_hash = rabin_hash(ts, k);
  for (i = 0 ; i < n - k + 1 ; i++)
  {
    /*If we get a match on the rolling hash, check that the substring exists.*/
    if (bloom_query(filter, rolling_hash))
    {
    /* iterate through each chunk doing a string compare */
      for (j = 0 ; j < m / k ; j++)
      {
        /*If we find a match, increment the count.*/
        if (!strncmp(qs + j * k, ts + i, k))
        {
          score++;
          break;
        }
      }
    }
    /*roll the hash forward.*/
    rolling_hash = madd(mmul(mdel(rolling_hash, mmul(*(ts + i), hash_factor)), 256), *(ts + k + i));
  }
  
        return score;
}

bloom.c (where the bloom methods come from):

bloom.c

#include "bloom.h"

/* Constants for bloom filter implementation */
const int H1PRIME = 4189793;
const int H2PRIME = 3296731;
const int BLOOM_HASH_NUM = 10;

/* The hash function used by the bloom filter */
int
hash_i(int i, /* which of the BLOOM_HASH_NUM hashes to use */ 
       long long x /* a long long value to be hashed */)
{
        return ((x % H1PRIME) + i*(x % H2PRIME) + 1 + i*i);
}

/* Initialize a bloom filter by allocating a character array that can pack bsz bits.
   (each char represents 8 bits)
   Furthermore, clear all bits for the allocated character array. 
   Hint:  use the malloc and bzero library function 
         Return value is the newly initialized bloom_filter struct.*/
bloom_filter 
bloom_init(int bsz /* size of bitmap to allocate in bits*/ )
{
    bloom_filter f;
    f.bsz = bsz;
    
    /* calculates the number of bytes required*/
    int bytes_size;
    if (bsz % 8 == 0)
        bytes_size = bsz/8;
    else
        bytes_size = (bsz - (bsz % 8))/8 + 1;
    
    /* allocates memory for the charactera array and sets it to zero*/
    f.buf = (char *) calloc(bsz , bytes_size);
    
    return f;
}

/* Add elm into the given bloom filter*/
void
bloom_add(bloom_filter f,
          long long elm /* the element to be added (a RK hash value) */)
{
    int i;
    int bloom_position;
    for (i = 0; i < BLOOM_HASH_NUM; i++)
    {
        /* Finds the required byte and bit poistion */
        bloom_position = hash_i(i, elm) % f.bsz;
        int temp_byte = 1 << (7 - bloom_position % 8);
        
        /* Sets the requried bit to 1 using OR operation */
        f.buf[bloom_position / 8] = f.buf[bloom_position / 8] | temp_byte;
    }
    return;
}

/* Query if elm is probably in the given bloom filter */ 
int
bloom_query(bloom_filter f,
            long long elm /* the query element */ )
{
    int i;
    int bloom_position;
    for (i = 0; i < BLOOM_HASH_NUM; i++)
    {
        /* Finds the required byte and bit poistion */
        bloom_position = hash_i(i, elm) % f.bsz;
        int temp_byte = 1 << (7 - bloom_position % 8);
        
        /* Checks whether the bit is 1 using AND operation*/
        if (!( (f.buf[bloom_position / 8]) & temp_byte))
            return 0;
    }
    return 1;
}

void 
bloom_free(bloom_filter *f)
{
        free(f->buf);
        f->buf = f->bsz = 0;
}

/* print out the first count bits in the bloom filter */
void
bloom_print(bloom_filter f,
            int count     /* number of bits to display*/ )
{
        int i;

        assert(count % 8 == 0);

        for(i=0; i< (f.bsz>>3) && i < (count>>3); i++) {
                printf("%02x ", (unsigned char)(f.buf[i]));
        }
        printf("\n");
        return;
}
0 0
Add a comment Improve this question Transcribed image text
Answer #1

Below is your function

/* Produces the initial rabin hash of length len at the start 
 of the string */
long long
rabin_hash(const char *to_encode, /* the string to encode */
           int len                /* the length of the encoded string */
           )
{
  long long hash = 0;
  int i;
  /* Iterate backwards over the range to encode it */
  for (i = 0; i < len; i++)
  {
    hash = madd(*(to_encode + i), mmul(hash, 256));
  }
  return hash;
}
Add a comment
Know the answer?
Add Answer to:
Implement a rabin_hash method to make the following code work: int rabin_karp_batchmatch(int bsz, /* size of...
Your Answer:

Post as a guest

Your Name:

What's your source?

Earn Coins

Coins can be redeemed for fabulous gifts.

Not the answer you're looking for? Ask your own homework help question. Our experts will answer your question WITHIN MINUTES for Free.
Similar Homework Help Questions
ADVERTISEMENT
Free Homework Help App
Download From Google Play
Scan Your Homework
to Get Instant Free Answers
Need Online Homework Help?
Ask a Question
Get Answers For Free
Most questions answered within 3 hours.
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT