Assessment-2B/main.py

93 lines
No EOL
3.2 KiB
Python

import json
import string
def sanitiseInput(dirtyQuestion):
"""Sanitise a user inputted question to make it lower case, remove punctuation
Arguments:
dirtyQuestion -- a question that contains punctuation and different-cased letter
Returns:
String with the user's input without punctuation and all in lower cawse
"""
# Translate the dirty input into a sanitised one by removing all punctuation characters
# maketrans creates a table of characters to change, change with, and remove. In this case, no changes except remove punctuation.
# Derived from: https://datagy.io/python-remove-punctuation-from-string/
newQuestion = dirtyQuestion.translate(str.maketrans('', '', string.punctuation))
newQuestion = newQuestion.lower().strip()
return newQuestion
def jaccardSimilarity(userQuestion, questionBank):
"""Check the similarity of a user defined question to the bank of questions available
Arguments:
userQuestion -- question that the user has entered
questionBank -- python json object of questions and answers
Returns:
Float between 0 and 1 of similarity (1 being exactly the same)
"""
s1 = set(sanitiseInput(userQuestion))
s2 = set(sanitiseInput(questionBank))
return float(len(s1.intersection(s2)) / len(s1.union(s2)))
def mostLikely(userQuestion):
"""Checks how likely a user entered question is to the list of known questions
Arguments:
userQuestion -- question that the user has entered
Returns:
String of the most likely question from the known user bank
"""
likelihoodScore = []
fileObject = open("faq.json", "r")
jsonContent = fileObject.read()
questionBank = json.loads(jsonContent)
# Using list of known questions, iterate through and determine similarity of all of them
for faqPair in questionBank:
likelihoodScore.append(jaccardSimilarity(userQuestion, faqPair['question']))
# Get the question that has the highest likelihood
mostLikelyIndex = likelihoodScore.index(max(likelihoodScore))
mostLikely = questionBank[mostLikelyIndex]
fileObject.close() # Close file to prevent IO errors
return mostLikely
def continueQuestions(userQuestion):
"""Check whether question has any content in it.
Arguments:
userQuestion -- question that the user has entered
Returns:
False if input is empty or True if required to re-loop
"""
if sanitiseInput(userQuestion):
likelyQuestion = mostLikely(userQuestion)
print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n")
# Open the file in append & read mode ('a+')
with open("asked_questions_log.txt", "a+") as file_object:
file_object.write(userQuestion + "\n")
return True
else:
# return False if there is no input to exit the program
return False
def main():
print("Hello, I am a question answering bot.\n")
userQuestion = ''
userQuestion = input('Please enter a question, and press the ENTER key: \n')
# Loop again and again when user enters a question and provide user with results
while continueQuestions(userQuestion):
userQuestion = input('Please enter a question, and press the ENTER key: \n')
# Exit program gracefully with a message
print("Goodbye!")
if __name__ == "__main__":
main()