Question

I have a project to write a C program to find how many different words in...

I have a project to write a C program to find how many different words in a file , get a file as a input and find 10 most common words in file. Hope anyone can help. Thank you so much!

0 0
Add a comment Improve this question Transcribed image text
Answer #1

`Hey,

Note: If you have any queries related the answer please do comment. I would be very happy to resolve all your queries.

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include<stdbool.h>
# define MAX_CHARS 26000
# define MAX_WORD_SIZE 30000
// A Trie node
struct TrieNode
{
bool isEnd; // indicates end of word
unsigned frequency; // the number of occurrences of a word
int indexMinHeap; // the index of the word in minHeap
TrieNode* child[MAX_CHARS]; // represents 26 slots each for 'a' to 'z'.
};
  
// A Min Heap node
struct MinHeapNode
{
TrieNode* root; // indicates the leaf node of TRIE
unsigned frequency; // number of occurrences
char* word; // the actual word stored
};
  
// A Min Heap
struct MinHeap
{
unsigned capacity; // the total size a min heap
int count; // indicates the number of slots filled.
MinHeapNode* array; // represents the collection of minHeapNodes
};
  
// A utility function to create a new Trie node
TrieNode* newTrieNode()
{
// Allocate memory for Trie Node
TrieNode* trieNode = new TrieNode;
  
// Initialize values for new node
trieNode->isEnd = 0;
trieNode->frequency = 0;
trieNode->indexMinHeap = -1;
for( int i = 0; i < MAX_CHARS; ++i )
trieNode->child[i] = NULL;
  
return trieNode;
}
  
// A utility function to create a Min Heap of given capacity
MinHeap* createMinHeap( int capacity )
{
MinHeap* minHeap = new MinHeap;
  
minHeap->capacity = capacity;
minHeap->count = 0;
  
// Allocate memory for array of min heap nodes
minHeap->array = new MinHeapNode [ minHeap->capacity ];
  
return minHeap;
}
  
// A utility function to swap two min heap nodes. This function
// is needed in minHeapify
void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )
{
MinHeapNode temp = *a;
*a = *b;
*b = temp;
}
  
// This is the standard minHeapify function. It does one thing extra.
// It updates the minHapIndex in Trie when two nodes are swapped in
// in min heap
void minHeapify( MinHeap* minHeap, int idx )
{
int left, right, smallest;
  
left = 2 * idx + 1;
right = 2 * idx + 2;
smallest = idx;
if ( left < minHeap->count &&
minHeap->array[ left ]. frequency <
minHeap->array[ smallest ]. frequency
)
smallest = left;
  
if ( right < minHeap->count &&
minHeap->array[ right ]. frequency <
minHeap->array[ smallest ]. frequency
)
smallest = right;
  
if( smallest != idx )
{
// Update the corresponding index in Trie node.
minHeap->array[ smallest ]. root->indexMinHeap = idx;
minHeap->array[ idx ]. root->indexMinHeap = smallest;
  
// Swap nodes in min heap
swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);
  
minHeapify( minHeap, smallest );
}
}
  
// A standard function to build a heap
void buildMinHeap( MinHeap* minHeap )
{
int n, i;
n = minHeap->count - 1;
  
for( i = ( n - 1 ) / 2; i >= 0; --i )
minHeapify( minHeap, i );
}
  
// Inserts a word to heap, the function handles the 3 cases explained above
void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )
{
// Case 1: the word is already present in minHeap
if( (*root)->indexMinHeap != -1 )
{
++( minHeap->array[ (*root)->indexMinHeap ]. frequency );
  
// percolate down
minHeapify( minHeap, (*root)->indexMinHeap );
}
  
// Case 2: Word is not present and heap is not full
else if( minHeap->count < minHeap->capacity )
{
int count = minHeap->count;
minHeap->array[ count ]. frequency = (*root)->frequency;
minHeap->array[ count ]. word = new char [strlen( word ) + 1];
strcpy( minHeap->array[ count ]. word, word );
  
minHeap->array[ count ]. root = *root;
(*root)->indexMinHeap = minHeap->count;
  
++( minHeap->count );
buildMinHeap( minHeap );
}
  
// Case 3: Word is not present and heap is full. And frequency of word
// is more than root. The root is the least frequent word in heap,
// replace root with new word
else if ( (*root)->frequency > minHeap->array[0]. frequency )
{
  
minHeap->array[ 0 ]. root->indexMinHeap = -1;
minHeap->array[ 0 ]. root = *root;
minHeap->array[ 0 ]. root->indexMinHeap = 0;
minHeap->array[ 0 ]. frequency = (*root)->frequency;
  
// delete previously allocated memoory and
delete [] minHeap->array[ 0 ]. word;
minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];
strcpy( minHeap->array[ 0 ]. word, word );
  
minHeapify ( minHeap, 0 );
}
}
  
// Inserts a new word to both Trie and Heap
void insertUtil ( TrieNode** root, MinHeap* minHeap,
const char* word, const char* dupWord )
{
// Base Case
if ( *root == NULL )
*root = newTrieNode();
  
// There are still more characters in word
if ( *word != '\0' )
insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),
minHeap, word + 1, dupWord );
else // The complete word is processed
{
// word is already present, increase the frequency
if ( (*root)->isEnd )
++( (*root)->frequency );
else
{
(*root)->isEnd = 1;
(*root)->frequency = 1;
}
  
// Insert in min heap also
insertInMinHeap( minHeap, root, dupWord );
}
}
  
  
// add a word to Trie & min heap. A wrapper over the insertUtil
void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)
{
insertUtil( root, minHeap, word, word );
}
  
// A utility function to show results, The min heap
// contains k most frequent words so far, at any time
void displayMinHeap( MinHeap* minHeap )
{
int i;
  
// print top K word with frequency
for( i = 0; i < minHeap->count; ++i )
{
printf( "%s : %d\n", minHeap->array[i].word,
minHeap->array[i].frequency );
}
}
  
// The main funtion that takes a file as input, add words to heap
// and Trie, finally shows result from heap
void printKMostFreq( FILE* fp, int k )
{
// Create a Min Heap of Size k
MinHeap* minHeap = createMinHeap( k );

// Create an empty Trie
TrieNode* root = NULL;
  
// A buffer to store one word at a time
char buffer[MAX_WORD_SIZE];
  
// Read words one by one from file. Insert the word in Trie and Min Heap
while( fscanf( fp, "%s", buffer ) != EOF )
insertTrieAndHeap(buffer, &root, minHeap);
  
// The Min Heap will have the k most frequent words, so print Min Heap nodes
displayMinHeap( minHeap );
}
  
// Driver program to test above functions
int main()
{
int k = 10;
FILE *fp = fopen ("file.txt", "r");
if (fp == NULL)
printf ("File doesn't exist ");
else
printKMostFreq (fp, k);
return 0;
}

Kindly revert for any queries

Thanks.

Add a comment
Know the answer?
Add Answer to:
I have a project to write a C program to find how many different words in...
Your Answer:

Post as a guest

Your Name:

What's your source?

Earn Coins

Coins can be redeemed for fabulous gifts.

Not the answer you're looking for? Ask your own homework help question. Our experts will answer your question WITHIN MINUTES for Free.
Similar Homework Help Questions
  • //I NEED THE PROGRAM IN C LANGUAGE!// QUESTION: I need you to write a program which...

    //I NEED THE PROGRAM IN C LANGUAGE!// QUESTION: I need you to write a program which manipulates text from an input file using the string library. Your program will accept command line arguments for the input and output file names as well as a list of blacklisted words. There are two major features in this programming: 1. Given an input file with text and a list of words, find and replace every use of these blacklisted words with the string...

  • In C Programming: (100 pts) Write a program to find the words that have a specific...

    In C Programming: (100 pts) Write a program to find the words that have a specific character among the words of a string. Read a string and a character from the user and display all the words having that character on the screen. One way to do this is to find the words in one sentence and check if they have the required character in them. Another way is to find all the occurence of the character and then find...

  • All the white space among words in a text file was lost. Write a C++ program...

    All the white space among words in a text file was lost. Write a C++ program which using dynamic programming to get all of the possible original text files (i.e. with white spaces between words) and rank them in order of likelihood with the best possible runtime. You have a text file of dictionary words and the popularity class of the word (words are listed from popularity 1-100 (being most popular words), 101-200, etc) - Input is a text file...

  • Description: Overview: You will write a program (says wordcountfreq.c) to find out the number of words and how many times each word appears (i.e., the frequency) in multiple text files. Specifically,...

    Description: Overview: You will write a program (says wordcountfreq.c) to find out the number of words and how many times each word appears (i.e., the frequency) in multiple text files. Specifically, the program will first determine the number of files to be processed. Then, the program will createmultiple threads where each thread is responsible for one file to count the number of words appeared in the file and report the number of time each word appears in a global linked-list....

  • In c programming, I need to write a program that reverses each of the lines in...

    In c programming, I need to write a program that reverses each of the lines in a file. The program accepts two command line arguments: The first one is the name of the input file The second is the name of the output file If the user didn't give two filenames, display an error message and exit. The program reads in each line of the input file and writes each line in reverse to the output file. Note, the lines...

  • C++ Program Help. Was just wondering how to write such code to perform this. - Take...

    C++ Program Help. Was just wondering how to write such code to perform this. - Take approximately 360 words from any English document as your input text. Ignore all blanks, all punctuation marks, all special symbols. Create an input file with this input text - Have the program to read the input from the input file "infile.dat".

  • In c++. I have 6 different text files that I want my program to read during...

    In c++. I have 6 different text files that I want my program to read during different runs. How would I make it so that i can read a select whichi file I want it to read during each run?

  • I just need an algorithm for this please! I have C++ code for it but I dont know how to creat an ...

    I just need an algorithm for this please! I have C++ code for it but I dont know how to creat an algorithm .. CSE 1311-Project 4 Part I: Create and print out the two arrays: (Be sure to do this first) You are allowed to hard code these arrays into your program. You can also put the data into a file and read the information into the program. The data is as follows: 150 250 Anne Bob Ralph 305...

  • C++ (1) Write a program to prompt the user for an input and output file name....

    C++ (1) Write a program to prompt the user for an input and output file name. The program should check for errors in opening the files, and print the name of any file which has an error, and exit if an error occurs. For example, (user input shown in caps in first line, and in second case, trying to write to a folder which you may not have write authority in) Enter input filename: DOESNOTEXIST.T Error opening input file: DOESNOTEXIST.T...

  • Write the following C++ program that searches for specified words hidden within a matrix of letters....

    Write the following C++ program that searches for specified words hidden within a matrix of letters. 1) Read a file that the user specifies. The file will first specify the dimensions of the matrix (e.g., 20 30). These two numbers specify the number of rows in the matrix and then the number of columns in the matrix. 2) Following these two numbers, there will be a number of rows of letters. These letters should be read into your matrix row...

ADVERTISEMENT
Free Homework Help App
Download From Google Play
Scan Your Homework
to Get Instant Free Answers
Need Online Homework Help?
Ask a Question
Get Answers For Free
Most questions answered within 3 hours.
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT