Python
Modify your recommendation program so that it reports the titles of the works rather than their file names. To do this, write a program that reads in the titles.txt file and creates a dictionary that looks up the title using the file name. This dictionary should then be used to report the works by their title instead of their file name.
Script to use:
import os
import math
def count_word(table, word):
'for the word entry in the table, increment its count or init to
1'
if word in table:
table[word] += 1
else:
# initialize count of word to 1
table[word] = 1
def analyze():
'''read all texts from the docs folder, report similarity
comparisons
among all pairs'''
doc_table = dict()
word_set = set()
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
doc_table[fname] = dict()
data = fd.read()
print("splitting")
dataList = data.split()
print("{} has {} words". format(fname, len(dataList)))
for word in dataList:
word_set.add(word)
count_word(doc_table[fname], word)
fd.close()
os.chdir('..') # return to parent directory
for fname in fileList:
for fname2 in fileList:
sim = similarity(doc_table[fname], doc_table[fname2],
word_set)
print("{:.2f} : {} vs. {}".format(sim, fname, fname2))
def build_title_file():
"creates titles.txt based on works in the docs folder"
tfd = open("titles.txt", "w")
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
for line in fd:
if "Title: " in line:
tfd.write(fname + "\n")
tfd.write(line[7:])
break
fd.close()
os.chdir("..") # return to parent directory
tfd.close()
def similarity(tableA, tableB, words):
'return cosine similarity between tableA and tableB over all
words'
ab = 0
a2 = 0
b2 = 0
for w in words:
ab += tableA.get(w, 0) * tableB.get(w, 0)
a2 += tableA.get(w, 0) * tableA.get(w, 0)
b2 += tableB.get(w, 0) * tableB.get(w, 0)
return ab / (math.sqrt(a2) * math.sqrt(b2))
Titles.txt file
alice_in_wonderland.txt
Aliceís Adventures in Wonderland
dracula.txt
Dracula
frankenstein.txt
Frankenstein
jane_eyre.txt
Jane Eyre
moby_dick.txt
Moby Dick; or The Whale
pride_and_prejudice.txt
Pride and Prejudice
tale_of_two_cities.txt
A Tale of Two Cities
udolpho.txt
The Mysteries of Udolpho
wizard_of_oz.txt
The Wonderful Wizard of Oz
#save the following in filename.py
import os
import math
def count_word(table, word):
'for the word entry in the table, increment its count or init to
1'
if word in table:
table[word] += 1
else:
# initialize count of word to 1
table[word] = 1
def analyze():
'''read all texts from the docs folder, report similarity
comparisons
among all pairs'''
doc_table = dict()
word_set = set()
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
doc_table[fname] = dict()
data = fd.read()
print("splitting")
dataList = data.split()
print("{} has {} words". format(fname, len(dataList)))
for word in dataList:
word_set.add(word)
count_word(doc_table[fname], word)
fd.close()
os.chdir('..') # return to parent directory
for fname in fileList:
for fname2 in fileList:
sim = similarity(doc_table[fname], doc_table[fname2],
word_set)
print("{:.2f} : {} vs. {}".format(sim, fname, fname2))
def build_title_file():
"creates titles.txt based on works in the docs folder"
tfd = open("titles.txt", "w")
os.chdir('docs')
fileList = os.listdir()
for fname in fileList:
print("Opening " + fname)
fd = open(fname, "r", encoding="utf8")
for line in fd:
if "Title: " in line:
tfd.write(fname + "\n")
tfd.write(line[7:])
break
fd.close()
os.chdir("..") # return to parent directory
tfd.close()
def similarity(tableA, tableB, words):
'return cosine similarity between tableA and tableB over all
words'
ab = 0
a2 = 0
b2 = 0
for w in words:
ab += tableA.get(w, 0) * tableB.get(w, 0)
a2 += tableA.get(w, 0) * tableA.get(w, 0)
b2 += tableB.get(w, 0) * tableB.get(w, 0)
return ab / (math.sqrt(a2) * math.sqrt(b2))
def CreateAndFillDictionary():
titlesFile = open("docs/titles.txt","r")
dict={}
cnt=0;
for line in titlesFile.read().splitlines():
if cnt%2==0:
key=line
else:
value=line
dict[key]=value
cnt=cnt+1
print("dictionary contents: \n",dict)
print("\nKeys in dictionary:\n",dict.keys())
print("\nValues in dictionary:\n",dict.values())
#execution starts here
CreateAndFillDictionary()
Python Modify your recommendation program so that it reports the titles of the works rather than their file names. To do...
Python 12.10 LAB: Sorting TV Shows (dictionaries and lists) Write a program that first reads in the name of an input file and then reads the input file using the file.readlines() method. The input file contains an unsorted list of number of seasons followed by the corresponding TV show. Your program should put the contents of the input file into a dictionary where the number of seasons are the keys, and a list of TV shows are the values (since...
Python Modify your program from Learning Journal Unit 7 to read dictionary items from a file and write the inverted dictionary to a file. You will need to decide on the following: How to format each dictionary item as a text string in the input file. How to covert each input string into a dictionary item. How to format each item of your inverted dictionary as a text string in the output file. Create an input file with your original...
I cant get this python program to read all the unique words in a file. It can only read the number of the unique words in a line. import re import string from collections import Counter # opens user inputted filename ".txt" and (w+) makes new and writes def main(): textname = input("Enter the file to search: ") fh = open(textname, 'r', encoding='utf-8' ) linecount = 0 wordcount = 0 count = {} print("Sumary of the", fh) for line in...
Modify your program from Learning Journal Unit 7 to read dictionary items from a file and write the inverted dictionary to a file. You will need to decide on the following:How to format each dictionary item as a text string in the input file.How to covert each input string into a dictionary item.How to format each item of your inverted dictionary as a text string in the output file.Create an input file with your original three-or-more items and add at...