Co-authored-by: Nick Bland <NickBland@users.noreply.github.com>

This commit is contained in:
David Lawler 2022-03-10 13:18:37 +10:00
parent 8518a1689f
commit b04263b32a
6 changed files with 201 additions and 43 deletions

16
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
}
]
}

Binary file not shown.

View file

@ -6,3 +6,65 @@ test
/usr/local/bin/python3 "/Volumes/GoogleDrive-100976413726208790908/My Drive/UNI/INFS 2048 - System Design and Realisation/Assessment 2B/main.py" /usr/local/bin/python3 "/Volumes/GoogleDrive-100976413726208790908/My Drive/UNI/INFS 2048 - System Design and Realisation/Assessment 2B/main.py"
weather weather
today today
birthday
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
#!@#!@#@!%#$@#$@%#$%$$#!$#@!#@weather
weather
Hello
$$%
$@$@$@$@$@$
#$#$#$#$
#$#$#$#
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather
weather

96
main.py
View file

@ -1,43 +1,93 @@
import json import json
import string
def jaccard_similarity(userQuestion, questionBank): def sanitiseInput(dirtyQuestion):
s1 = set(userQuestion) """Sanitise a user inputted question to make it lower case, remove punctuation
s2 = set(questionBank)
return float(len(s1.intersection(s2)) / len(s1.union(s2)))
def most_likely(userQuestion): Arguments:
dirtyQuestion -- a question that contains punctuation and different-cased letter
Returns:
String with the user's input without punctuation and all in lower cawse
"""
# Translate the dirty input into a sanitised one by removing all punctuation characters
# maketrans creates a table of characters to change, change with, and remove. In this case, no changes except remove punctuation.
# Derived from: https://datagy.io/python-remove-punctuation-from-string/
newQuestion = dirtyQuestion.translate(str.maketrans('', '', string.punctuation))
newQuestion = newQuestion.lower().strip()
return newQuestion
def jaccardSimilarity(userQuestion, questionBank):
"""Check the similarity of a user defined question to the bank of questions available
Arguments:
userQuestion -- question that the user has entered
questionBank -- python json object of questions and answers
Returns:
Float between 0 and 1 of similarity (1 being exactly the same)
"""
s1 = set(sanitiseInput(userQuestion))
s2 = set(sanitiseInput(questionBank))
return float(len(s1.intersection(s2)) / len(s1.union(s2)))
def mostLikely(userQuestion):
"""Checks how likely a user entered question is to the list of known questions
Arguments:
userQuestion -- question that the user has entered
Returns:
String of the most likely question from the known user bank
"""
likelihoodScore = [] likelihoodScore = []
fileObject = open("faq.json", "r") fileObject = open("faq.json", "r")
jsonContent = fileObject.read() jsonContent = fileObject.read()
aList = json.loads(jsonContent) questionBank = json.loads(jsonContent)
for json_object in aList: # Using list of known questions, iterate through and determine similarity of all of them
likelihoodScore.append(jaccard_similarity(userQuestion, json_object['question'])) for faqPair in questionBank:
#print(likelihoodScore) likelihoodScore.append(jaccardSimilarity(userQuestion, faqPair['question']))
# Get the question that has the highest likelihood
mostLikelyIndex = likelihoodScore.index(max(likelihoodScore)) mostLikelyIndex = likelihoodScore.index(max(likelihoodScore))
mostLikely = aList[mostLikelyIndex] mostLikely = questionBank[mostLikelyIndex]
fileObject.close() fileObject.close() # Close file to prevent IO errors
return mostLikely return mostLikely
def continueQuestions(userQuestion):
"""Check whether question has any content in it.
Arguments:
userQuestion -- question that the user has entered
Returns:
False if input is empty or True if required to re-loop
"""
if sanitiseInput(userQuestion):
likelyQuestion = mostLikely(userQuestion)
print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n")
# Open the file in append & read mode ('a+')
with open("asked_questions_log.txt", "a+") as file_object:
file_object.write(userQuestion + "\n")
return True
else:
# return False if there is no input to exit the program
return False
def main(): def main():
print("Hello, I am a question answering bot.\n") print("Hello, I am a question answering bot.\n")
userQuestion = '' userQuestion = ''
while True: userQuestion = input('Please enter a question, and press the ENTER key: \n')
userQuestion = input('Please enter a question, and press the ENTER key: \n').strip()
if userQuestion:
likelyQuestion = most_likely(userQuestion)
print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n")
# Open the file in append & read mode ('a+')
with open("asked_questions_log.txt", "a+") as file_object:
file_object.write("\n")
file_object.write(userQuestion)
# Loop again and again when user enters a question and provide user with results
while continueQuestions(userQuestion):
userQuestion = input('Please enter a question, and press the ENTER key: \n')
else : # Exit program gracefully with a message
print ("Goodbye!") print("Goodbye!")
exit()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -1,25 +1,55 @@
import main import main
import pytest import pytest
from os.path import exists
#def test_CanInstantiatejaccard_similarity():
# co = main.jaccard_similarity(userQuestion="Weather", questionBank="Same as yesterday.")
# assert main.jaccard_similarity(float)
def test_likelyQuestion(): def test_likelyQuestion():
likelyQuestion = main.most_likely("birthday") likelyQuestion = main.mostLikely("birthday")
assert likelyQuestion['question'] == 'What is your birthday?' assert likelyQuestion['question'] == 'What is your birthday?'
def test_likelyAnswer(): def test_likelyAnswer():
likelyAnswer = main.most_likely("weather today") likelyAnswer = main.mostLikely("weather today")
assert likelyAnswer['answer'] == 'Same as yesterday.' assert likelyAnswer['answer'] == 'Same as yesterday.'
def test_detection_of_no_user_input(): def test_detection_of_no_user_input():
# Override the Python built-in input method userQuestion = ''
main.main.input = lambda: '' assert not main.continueQuestions(userQuestion)
# Call the function you would like to test (which uses input)
output = main.main() def testDetectionOfUserInput():
assert output == [ userQuestion = 'weather'
'Hello, I am a question answering bot.', assert main.continueQuestions(userQuestion)
'Please enter a question, and press the ENTER key: ',
'Goodbye!' def testFaqJsonFileExists():
] fileName = exists('./faq.json')
assert fileName
# Check whether log file gets made after a successful run
def testLogfileExists():
logName = exists('./asked_questions_log.txt')
assert logName
def readFileLines(fileName):
file = open(fileName, "r")
# Create a count of non-empty lines
nonemptyLines = [line.strip("\n") for line in file if line != "\n"]
lineCount = len(nonemptyLines)
file.close()
return lineCount
def testLogfileLinecountIncreases():
oldLineCount = readFileLines("./asked_questions_log.txt")
userQuestion = 'weather'
main.continueQuestions(userQuestion)
newLineCount = readFileLines("./asked_questions_log.txt")
assert newLineCount == oldLineCount + 1
def testDetectionIfUserEntersSymbolsOnly():
userQuestion = '#$%^&*'
assert not main.continueQuestions(userQuestion)
def testDetectionIfUserEntersSpaceOnly():
userQuestion = ' '
assert not main.continueQuestions(userQuestion)