Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...

Question

Question

Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...

Python

Modify your recommendation program so that it reports the titles of the works rather than their file names. To do this, write a program that reads in the titles.txt file and creates a dictionary that looks up the title using the file name. This dictionary should then be used to report the works by their title instead of their file name.

Script to use:

import os
import math

def count_word(table, word):
'for the word entry in the table, increment its count or init to 1'
if word in table:
table[word] += 1
else:
# initialize count of word to 1
table[word] = 1

def analyze():
'''read all texts from the docs folder, report similarity comparisons
among all pairs'''

doc_table = dict()
word_set = set()
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
doc_table[fname] = dict()
data = fd.read()
print("splitting")
dataList = data.split()
print("{} has {} words". format(fname, len(dataList)))
for word in dataList:
word_set.add(word)
count_word(doc_table[fname], word)
fd.close()
os.chdir('..') # return to parent directory

for fname in fileList:
for fname2 in fileList:
sim = similarity(doc_table[fname], doc_table[fname2], word_set)
print("{:.2f} : {} vs. {}".format(sim, fname, fname2))

def build_title_file():
"creates titles.txt based on works in the docs folder"
tfd = open("titles.txt", "w")
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
for line in fd:
if "Title: " in line:
tfd.write(fname + "\n")
tfd.write(line[7:])
break
fd.close()
os.chdir("..") # return to parent directory
tfd.close()

def similarity(tableA, tableB, words):
'return cosine similarity between tableA and tableB over all words'
ab = 0
a2 = 0
b2 = 0
for w in words:
ab += tableA.get(w, 0) * tableB.get(w, 0)
a2 += tableA.get(w, 0) * tableA.get(w, 0)
b2 += tableB.get(w, 0) * tableB.get(w, 0)
return ab / (math.sqrt(a2) * math.sqrt(b2))

Titles.txt file

alice_in_wonderland.txt
Aliceís Adventures in Wonderland
dracula.txt
Dracula
frankenstein.txt
Frankenstein
jane_eyre.txt
Jane Eyre
moby_dick.txt
Moby Dick; or The Whale
pride_and_prejudice.txt
Pride and Prejudice
tale_of_two_cities.txt
A Tale of Two Cities
udolpho.txt
The Mysteries of Udolpho
wizard_of_oz.txt
The Wonderful Wizard of Oz

engineering Computer-Science

Add a comment Improve this question Transcribed image text

Answer 1

Answer #1

dictionary contents: (alice_in_wonderland.txt: AliceAlxads Adventures in Wonderland, dracula.txt: Drac a, frankenstei 01.py-C/Users/MANI KANTA/Desktop/test/Ol.py (3.7.2) File Edit Format Run Options Window Help if Title: in line: tfd.write titles.tt X 1 alice in wonderland.txt 2 Aliceis Adventures in Wonderland 3 dracula.txt 4 Dracula 5 frankenstein.txt 6 Franken

#save the following in filename.py

import os
import math

def count_word(table, word):
'for the word entry in the table, increment its count or init to 1'
if word in table:
table[word] += 1
else:
# initialize count of word to 1
table[word] = 1

def analyze():
'''read all texts from the docs folder, report similarity comparisons
among all pairs'''

doc_table = dict()
word_set = set()
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
doc_table[fname] = dict()
data = fd.read()
print("splitting")
dataList = data.split()
print("{} has {} words". format(fname, len(dataList)))
for word in dataList:
word_set.add(word)
count_word(doc_table[fname], word)
fd.close()
os.chdir('..') # return to parent directory

for fname in fileList:
for fname2 in fileList:
sim = similarity(doc_table[fname], doc_table[fname2], word_set)
print("{:.2f} : {} vs. {}".format(sim, fname, fname2))

def build_title_file():
"creates titles.txt based on works in the docs folder"
tfd = open("titles.txt", "w")
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
for line in fd:
if "Title: " in line:
tfd.write(fname + "\n")
tfd.write(line[7:])
break
fd.close()
os.chdir("..") # return to parent directory
tfd.close()

def similarity(tableA, tableB, words):
'return cosine similarity between tableA and tableB over all words'
ab = 0
a2 = 0
b2 = 0
for w in words:
ab += tableA.get(w, 0) * tableB.get(w, 0)
a2 += tableA.get(w, 0) * tableA.get(w, 0)
b2 += tableB.get(w, 0) * tableB.get(w, 0)
return ab / (math.sqrt(a2) * math.sqrt(b2))

def CreateAndFillDictionary():
titlesFile = open("docs/titles.txt","r")
dict={}
cnt=0;
for line in titlesFile.read().splitlines():
if cnt%2==0:
key=line
else:
value=line
dict[key]=value
cnt=cnt+1
print("dictionary contents: \n",dict)
print("\nKeys in dictionary:\n",dict.keys())
print("\nValues in dictionary:\n",dict.values())

#execution starts here
CreateAndFillDictionary()

Add a comment

Answer 2

Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...

Homework Answers

Add Answer to:
Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...

Post as a guest

Earn Coins

Python 12.10 LAB: Sorting TV Shows (dictionaries and lists) Write a program that first reads in...

Python Modify your program from Learning Journal Unit 7 to read dictionary items from a file...

I cant get this python program to read all the unique words in a file. It...

Modify your program from Learning Journal Unit 7 to read dictionary items from a file and write the inverted dictionary to a file. You will need to decide on the following:

Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...

Homework Answers

Add Answer to: Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...

Post as a guest

Earn Coins

Python 12.10 LAB: Sorting TV Shows (dictionaries and lists) Write a program that first reads in...

Python Modify your program from Learning Journal Unit 7 to read dictionary items from a file...

I cant get this python program to read all the unique words in a file. It...

Modify your program from Learning Journal Unit 7 to read dictionary items from a file and write the inverted dictionary to a file. You will need to decide on the following:

Add Answer to:
Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...