Co-authored-by: Nick Bland <NickBland@users.noreply.github.com>

This commit is contained in:
David Lawler 2022-03-10 13:18:37 +10:00
parent 8518a1689f
commit b04263b32a
6 changed files with 201 additions and 43 deletions

16
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
}
]
}

Binary file not shown.

View file

@ -6,3 +6,65 @@ test
/usr/local/bin/python3 "/Volumes/GoogleDrive-100976413726208790908/My Drive/UNI/INFS 2048 - System Design and Realisation/Assessment 2B/main.py"
weather
today
birthday
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
#!@#!@#@!%#$@#$@%#$%$$#!$#@!#@weather
weather
Hello
$$%
$@$@$@$@$@$
#$#$#$#$
#$#$#$#
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather

96
main.py
View file

@ -1,43 +1,93 @@
import json
import string
def jaccard_similarity(userQuestion, questionBank):
s1 = set(userQuestion)
s2 = set(questionBank)
return float(len(s1.intersection(s2)) / len(s1.union(s2)))
def sanitiseInput(dirtyQuestion):
"""Sanitise a user inputted question to make it lower case, remove punctuation
def most_likely(userQuestion):
Arguments:
dirtyQuestion -- a question that contains punctuation and different-cased letter
Returns:
String with the user's input without punctuation and all in lower cawse
"""
# Translate the dirty input into a sanitised one by removing all punctuation characters
# maketrans creates a table of characters to change, change with, and remove. In this case, no changes except remove punctuation.
# Derived from: https://datagy.io/python-remove-punctuation-from-string/
newQuestion = dirtyQuestion.translate(str.maketrans('', '', string.punctuation))
newQuestion = newQuestion.lower().strip()
return newQuestion
def jaccardSimilarity(userQuestion, questionBank):
"""Check the similarity of a user defined question to the bank of questions available
Arguments:
userQuestion -- question that the user has entered
questionBank -- python json object of questions and answers
Returns:
Float between 0 and 1 of similarity (1 being exactly the same)
"""
s1 = set(sanitiseInput(userQuestion))
s2 = set(sanitiseInput(questionBank))
return float(len(s1.intersection(s2)) / len(s1.union(s2)))
def mostLikely(userQuestion):
"""Checks how likely a user entered question is to the list of known questions
Arguments:
userQuestion -- question that the user has entered
Returns:
String of the most likely question from the known user bank
"""
likelihoodScore = []
fileObject = open("faq.json", "r")
jsonContent = fileObject.read()
aList = json.loads(jsonContent)
for json_object in aList:
likelihoodScore.append(jaccard_similarity(userQuestion, json_object['question']))
#print(likelihoodScore)
questionBank = json.loads(jsonContent)
# Using list of known questions, iterate through and determine similarity of all of them
for faqPair in questionBank:
likelihoodScore.append(jaccardSimilarity(userQuestion, faqPair['question']))
# Get the question that has the highest likelihood
mostLikelyIndex = likelihoodScore.index(max(likelihoodScore))
mostLikely = aList[mostLikelyIndex]
mostLikely = questionBank[mostLikelyIndex]
fileObject.close()
fileObject.close() # Close file to prevent IO errors
return mostLikely
def continueQuestions(userQuestion):
"""Check whether question has any content in it.
Arguments:
userQuestion -- question that the user has entered
Returns:
False if input is empty or True if required to re-loop
"""
if sanitiseInput(userQuestion):
likelyQuestion = mostLikely(userQuestion)
print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n")
# Open the file in append & read mode ('a+')
with open("asked_questions_log.txt", "a+") as file_object:
file_object.write(userQuestion + "\n")
return True
else:
# return False if there is no input to exit the program
return False
def main():
print("Hello, I am a question answering bot.\n")
userQuestion = ''
while True:
userQuestion = input('Please enter a question, and press the ENTER key: \n').strip()
if userQuestion:
likelyQuestion = most_likely(userQuestion)
print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n")
# Open the file in append & read mode ('a+')
with open("asked_questions_log.txt", "a+") as file_object:
file_object.write("\n")
file_object.write(userQuestion)
userQuestion = input('Please enter a question, and press the ENTER key: \n')
# Loop again and again when user enters a question and provide user with results
while continueQuestions(userQuestion):
userQuestion = input('Please enter a question, and press the ENTER key: \n')
else :
print ("Goodbye!")
exit()
# Exit program gracefully with a message
print("Goodbye!")
if __name__ == "__main__":
main()

View file

@ -1,25 +1,55 @@
import main
import pytest
#def test_CanInstantiatejaccard_similarity():
# co = main.jaccard_similarity(userQuestion="Weather", questionBank="Same as yesterday.")
# assert main.jaccard_similarity(float)
from os.path import exists
def test_likelyQuestion():
likelyQuestion = main.most_likely("birthday")
likelyQuestion = main.mostLikely("birthday")
assert likelyQuestion['question'] == 'What is your birthday?'
def test_likelyAnswer():
likelyAnswer = main.most_likely("weather today")
likelyAnswer = main.mostLikely("weather today")
assert likelyAnswer['answer'] == 'Same as yesterday.'
def test_detection_of_no_user_input():
# Override the Python built-in input method
main.main.input = lambda: ''
# Call the function you would like to test (which uses input)
output = main.main()
assert output == [
'Hello, I am a question answering bot.',
'Please enter a question, and press the ENTER key: ',
'Goodbye!'
]
userQuestion = ''
assert not main.continueQuestions(userQuestion)
def testDetectionOfUserInput():
userQuestion = 'weather'
assert main.continueQuestions(userQuestion)
def testFaqJsonFileExists():
fileName = exists('./faq.json')
assert fileName
# Check whether log file gets made after a successful run
def testLogfileExists():
logName = exists('./asked_questions_log.txt')
assert logName
def readFileLines(fileName):
file = open(fileName, "r")
# Create a count of non-empty lines
nonemptyLines = [line.strip("\n") for line in file if line != "\n"]
lineCount = len(nonemptyLines)
file.close()
return lineCount
def testLogfileLinecountIncreases():
oldLineCount = readFileLines("./asked_questions_log.txt")
userQuestion = 'weather'
main.continueQuestions(userQuestion)
newLineCount = readFileLines("./asked_questions_log.txt")
assert newLineCount == oldLineCount + 1
def testDetectionIfUserEntersSymbolsOnly():
userQuestion = '#$%^&*'
assert not main.continueQuestions(userQuestion)
def testDetectionIfUserEntersSpaceOnly():
userQuestion = ' '
assert not main.continueQuestions(userQuestion)