93 lines
No EOL
3.2 KiB
Python
93 lines
No EOL
3.2 KiB
Python
import json
|
|
import string
|
|
|
|
def sanitiseInput(dirtyQuestion):
|
|
"""Sanitise a user inputted question to make it lower case, remove punctuation
|
|
|
|
Arguments:
|
|
dirtyQuestion -- a question that contains punctuation and different-cased letter
|
|
|
|
Returns:
|
|
String with the user's input without punctuation and all in lower cawse
|
|
"""
|
|
# Translate the dirty input into a sanitised one by removing all punctuation characters
|
|
# maketrans creates a table of characters to change, change with, and remove. In this case, no changes except remove punctuation.
|
|
# Derived from: https://datagy.io/python-remove-punctuation-from-string/
|
|
newQuestion = dirtyQuestion.translate(str.maketrans('', '', string.punctuation))
|
|
newQuestion = newQuestion.lower().strip()
|
|
return newQuestion
|
|
|
|
def jaccardSimilarity(userQuestion, questionBank):
|
|
"""Check the similarity of a user defined question to the bank of questions available
|
|
|
|
Arguments:
|
|
userQuestion -- question that the user has entered
|
|
questionBank -- python json object of questions and answers
|
|
|
|
Returns:
|
|
Float between 0 and 1 of similarity (1 being exactly the same)
|
|
"""
|
|
s1 = set(sanitiseInput(userQuestion))
|
|
s2 = set(sanitiseInput(questionBank))
|
|
return float(len(s1.intersection(s2)) / len(s1.union(s2)))
|
|
|
|
def mostLikely(userQuestion):
|
|
"""Checks how likely a user entered question is to the list of known questions
|
|
|
|
Arguments:
|
|
userQuestion -- question that the user has entered
|
|
|
|
Returns:
|
|
String of the most likely question from the known user bank
|
|
"""
|
|
likelihoodScore = []
|
|
fileObject = open("faq.json", "r")
|
|
jsonContent = fileObject.read()
|
|
questionBank = json.loads(jsonContent)
|
|
# Using list of known questions, iterate through and determine similarity of all of them
|
|
for faqPair in questionBank:
|
|
likelihoodScore.append(jaccardSimilarity(userQuestion, faqPair['question']))
|
|
|
|
# Get the question that has the highest likelihood
|
|
mostLikelyIndex = likelihoodScore.index(max(likelihoodScore))
|
|
mostLikely = questionBank[mostLikelyIndex]
|
|
|
|
fileObject.close() # Close file to prevent IO errors
|
|
|
|
return mostLikely
|
|
|
|
def continueQuestions(userQuestion):
|
|
"""Check whether question has any content in it.
|
|
|
|
Arguments:
|
|
userQuestion -- question that the user has entered
|
|
|
|
Returns:
|
|
False if input is empty or True if required to re-loop
|
|
"""
|
|
if sanitiseInput(userQuestion):
|
|
likelyQuestion = mostLikely(userQuestion)
|
|
print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n")
|
|
# Open the file in append & read mode ('a+')
|
|
with open("asked_questions_log.txt", "a+") as file_object:
|
|
file_object.write(userQuestion + "\n")
|
|
return True
|
|
else:
|
|
# return False if there is no input to exit the program
|
|
return False
|
|
|
|
def main():
|
|
print("Hello, I am a question answering bot.\n")
|
|
|
|
userQuestion = ''
|
|
userQuestion = input('Please enter a question, and press the ENTER key: \n')
|
|
|
|
# Loop again and again when user enters a question and provide user with results
|
|
while continueQuestions(userQuestion):
|
|
userQuestion = input('Please enter a question, and press the ENTER key: \n')
|
|
|
|
# Exit program gracefully with a message
|
|
print("Goodbye!")
|
|
|
|
if __name__ == "__main__":
|
|
main() |