import json import string def sanitiseInput(dirtyQuestion): """Sanitise a user inputted question to make it lower case, remove punctuation Arguments: dirtyQuestion -- a question that contains punctuation and different-cased letter Returns: String with the user's input without punctuation and all in lower cawse """ # Translate the dirty input into a sanitised one by removing all punctuation characters # maketrans creates a table of characters to change, change with, and remove. In this case, no changes except remove punctuation. # Derived from: https://datagy.io/python-remove-punctuation-from-string/ newQuestion = dirtyQuestion.translate(str.maketrans('', '', string.punctuation)) newQuestion = newQuestion.lower().strip() return newQuestion def jaccardSimilarity(userQuestion, questionBank): """Check the similarity of a user defined question to the bank of questions available Arguments: userQuestion -- question that the user has entered questionBank -- python json object of questions and answers Returns: Float between 0 and 1 of similarity (1 being exactly the same) """ s1 = set(sanitiseInput(userQuestion)) s2 = set(sanitiseInput(questionBank)) return float(len(s1.intersection(s2)) / len(s1.union(s2))) def mostLikely(userQuestion): """Checks how likely a user entered question is to the list of known questions Arguments: userQuestion -- question that the user has entered Returns: String of the most likely question from the known user bank """ likelihoodScore = [] fileObject = open("faq.json", "r") jsonContent = fileObject.read() questionBank = json.loads(jsonContent) # Using list of known questions, iterate through and determine similarity of all of them for faqPair in questionBank: likelihoodScore.append(jaccardSimilarity(userQuestion, faqPair['question'])) # Get the question that has the highest likelihood mostLikelyIndex = likelihoodScore.index(max(likelihoodScore)) mostLikely = questionBank[mostLikelyIndex] fileObject.close() # Close file to prevent IO errors return mostLikely def continueQuestions(userQuestion): """Check whether question has any content in it. Arguments: userQuestion -- question that the user has entered Returns: Boolean value. Is False if input is empty or True if required to re-loop and allow user to ask more questions """ if sanitiseInput(userQuestion): likelyQuestion = mostLikely(userQuestion) print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n") # Open the file in append & read mode ('a+') with open("asked_questions_log.txt", "a+") as file_object: file_object.write(userQuestion + "\n") return True else: # return False if there is no input to exit the program return False def main(): print("Hello, I am a question answering bot.\n") userQuestion = '' userQuestion = input('Please enter a question, and press the ENTER key: \n') # Loop again and again when user enters a question and provide user with results while continueQuestions(userQuestion): userQuestion = input('Please enter a question, and press the ENTER key: \n') # Exit program gracefully with a message print("Goodbye!") if __name__ == "__main__": main()