I'm using Python 3.7 with Spyder
I need the full code and the same output as the sample above
Resources file: https://drive.google.com/file/d/1e5a21ZKRj2H_jOnWvg7HcjUKjJlY84KE/view - https://drive.google.com/file/d/1XIA41ra8AaKjFuxO5VpwVkn90bxwDyB5/view
As in the question, it is given to use the code given in GitHub link and use it to classify the given messages so I am using the same code.
from nltk.tokenize import
word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from math import log, sqrt
import pandas as pd
import numpy as np
import re
%matplotlib inline
mails = pd.read_csv('spam.csv',
encoding = 'latin-1')
mails.head()
mails.drop(['Unnamed: 2', 'Unnamed:
3', 'Unnamed: 4'], axis = 1, inplace = True)
mails.head()
mails.rename(columns = {'v1':
'labels', 'v2': 'message'}, inplace = True)
mails.head()
mails['labels'].value_counts()
mails['label'] =
mails['labels'].map({'ham': 0, 'spam': 1})
mails.head()
mails.drop(['labels'], axis = 1,
inplace = True)
mails.head()
totalMails = 4825 + 747
trainIndex, testIndex = list(), list()
for i in range(mails.shape[0]):
if np.random.uniform(0, 1) < 0.75:
trainIndex += [i]
else:
testIndex += [i]
trainData = mails.loc[trainIndex]
testData = mails.loc[testIndex]
trainData.reset_index(inplace =
True)
trainData.drop(['index'], axis = 1, inplace = True)
trainData.head()
testData.reset_index(inplace =
True)
testData.drop(['index'], axis = 1, inplace = True)
testData.head()
trainData['label'].value_counts()
testData['label'].value_counts()
spam_words = '
'.join(list(mails[mails['label'] == 0]['message']))
spam_wc = WordCloud(width = 512,height =
512).generate(spam_words)
plt.figure(figsize = (10, 8), facecolor = 'k')
plt.imshow(spam_wc)
plt.axis('off')
plt.tight_layout(pad = 0)
plt.show()
#training the model using tf_idf
word2vector train data set.
sc_tf_idf = SpamClassifier(trainData, 'tf-idf')
sc_tf_idf.train()
preds_tf_idf =
sc_tf_idf.predict(testData['message'])
print(metrics(testData['label'], preds_tf_idf))
#training the model using
bag-of-word vectors train data set
sc_bow = SpamClassifier(trainData, 'bow')
sc_bow.train()
preds_bow =
sc_bow.predict(testData['message'])
print(metrics(testData['label'], preds_bow))
message1 = "Want to change how you
recieve these mails? You can update your preferences or unsubscribe
from this list at http://guru.phishing.guru/."
message2 = "You are invited to law court by the judge of the law
voilation. Case:#3118804 Date: April 5, 2019, sent by
[email protected]"
#using tf_idf for predicting
message1
pm = process_message(message1)
print(pm)
print(sc_tf_idf.classify(pm))
#using tf_idf for predicting
message2
pm = process_message(message2)
print(pm)
print(sc_tf_idf.classify(pm))
Use the code of procss_message function, SpamClassifier function and metrics function same as provided in the GitHub link code.
The required output asked in question images are as follows:
I hope you got the answer and understand it. any doubts ask in comments.
Thank you:):)
I'm using Python 3.7 with Spyder I need the full code and the same output as...