From b04263b32a19e63f521bf920e26e1c2474007eab Mon Sep 17 00:00:00 2001 From: David Lawler Date: Thu, 10 Mar 2022 13:18:37 +1000 Subject: [PATCH] Co-authored-by: Nick Bland --- .vscode/launch.json | 16 +++ __pycache__/main.cpython-39.pyc | Bin 1444 -> 2785 bytes ...st_questionbot.cpython-39-pytest-7.0.1.pyc | Bin 1899 -> 4217 bytes asked_questions_log.txt | 64 ++++++++++- main.py | 104 +++++++++++++----- test_questionbot.py | 60 +++++++--- 6 files changed, 201 insertions(+), 43 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..306f58e --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + } + ] +} \ No newline at end of file diff --git a/__pycache__/main.cpython-39.pyc b/__pycache__/main.cpython-39.pyc index c6cf5bb49cfdf77648cca28e4ce383bdc5f36d15..4b5047315ed4d4d1cb2cc03b3e67813e6a07d9e0 100644 GIT binary patch literal 2785 zcmb`JO>-MH7{{f3u{Vy>7U;CibcRuRX=}(koxZ^^^eqquQfLYco?(n)$?-PpT}P7Z z+T)zeov)A_`34;M9LtdtN3NXsOB<(7%7Fvx*~sTX%zr@|BdPo(aNK!mvaq9n?=2O<(PxQC)5W^pgcvZ(HQ zYF(Dbgu=%bsm=Wfe`K>s?d|BWW0i3}z^Sj|G9m}W&;w6UL5=?`IP?$cJ{{46@Q@so z4vB5Um&Zf6+2UzzVl5eGJuMZB({9fgDcD|5Y7=KEGa2jfUCCm@lB^GtCf9O-DcQ-M zOV;hBP1EDnvQlB}p4#bkWNP$XJcwAC!PgT%i;EcJrPnq$teK^T$Elw7!gwn1Wve9> z{4CnR1WP35r0f7sr0J>Dx!*D>PIp*8Hf?6w(#`*xE>_@rvz~dyGfxs&9IvFWC7kA@ zR?cUP;;Bx!k-4vp%4h7VU0+_pGlt4%r&gPXE*g1g-*szVeeT3^>$9osPuBhmQI?m# z`^(>E3F4!dKh3h8L_SpUbGf*>vU2Cv%^Ry9+*-T2wsQN{os~N)x0k;hF!y@-o6U{o zjm`BfwzhKPHd|y{18ro7J(N1$NnNy_NS?&n3Pm0=9O4xr68~bqD#JK1%b3 zO>baHDd9q@7ig6#5KNjDqsd_n?0bjQZ8Qvk8j&#_(|ymi(L&odp7BTCK`;VhW59-X zShO$08jw-`HXwV@md#z4BOQ0*gsa#LSk{^_CuA#5Pa$U$y3NyFYc_#KGya_8^X){w z!kUG55^e_6Cf{IGfHuK{wYg@94cHRM`^4zJ9S9$F2c`|;pJ`mO?Ps!SP7MzjCl3|!_Y5G#8Y{jKz)qXs+;Ona}u*Aw2ylIj_5fI*yHB6;e0(HR6<{}SU)?Mx= zGR>>8ovy}Qu*LGQm*Q3n^6K=0^Xl|Ua!;?~TyxA7ms#uewP(E9F9mm@8BOZ339tK(@ys@|MA>9;u0t^m(oA&)+6g(t9T-pzzHh~F8WE2X2 zo{YUiO3B_k=q;J@2&lleGzvzgd9q)Im*@}*H!1^|VEDVEMYDF+XGy#(5vU_=I!fGe z+E}cuI@#ocf0wJ>H0!6w1};k{BxyQo{}Gda6&+;O4%^8v?9n7ADPS#?b==!xGEP=4 z%5XG{T6}NG@|9DSN0U!#GkK78k?cT8F7m(`Kwr4fd5L%NM!4MMVVny2BKJG|MIJVj zOv_rNUPoT)XWcB~c9w~)W~OAl(ux!L*g;&MwGJOZY!N~AhBYr^9$)cT)z8``eBr{p z;e1zKcANr8UBQ=c;nbEJG851WjcCP-=mq}_jl9Tn7AFAmVQ3+8N-{#jpgfJKGKHQ1 zCX}fWaGECnmM^Jtf8$Wf6_NpWk%Y1tTJ6E)t8?cSKYI5|FV^NqTOT z=++8gvcK))&C>s&GAHp_=Lu?U z+&ZdK09baqRtyz;s(S8?l6$=v`ee~oaxuxWt|jNv2AGPWQ%=wZS&wnO3#)7D62kg7 zy*9k!R0lP+ne>D#>@1{(V%g0Cn4EeI{cB5=;YH`5F@aDwl5A(myf8zLzn{~}=F|0B z==8{?H4nR}fkw?^v^?xX!ez~O?B!>Q9q{#0rLWIg^csbLo?<9}&DQ)l?a7I?{tyoJ z8C=Lr1fH&YRk}a}UtLE}({@h|`=`fNDaX+0lr?>wvm zXGK6jxH-J~nM{&wfo(9}VN?G|iqKen-p-JEWrrGm@Kqx552UCWQ`QSE)?I~K>$du% z&8Lr^uw6O0%PPYN)m>~4A1s777PPKPD1mHIJkwJiVyyD6ZNkKN^~ z_pFDg(XbYc2KM`9bSPV%i{x0*b{TU1kIhpQmPTW09QzMMxk3T|dt R@LqH-Iup%D=cBoZ{sZEQ4VVA` delta 729 zcmXX^&x_MQ6rNv6r)`?m#obmbSeM1!2ukWjL@y#J3rjD8VlE-g1WbRWoy=k@Evv+% zP?p>TVGI5jp8X>XUcBjl;hWSM-h6M~ym^oLzWKfPt!@X6Mh(IE;U4#&8W;A>^9QgK zNZ=9jkRXD7LLL@O7=T2Wq5?=GCisxZ`w`6$hTNCPss7bvdLhy9e=tB3>={_Fh$b8(kAX*slL`3s~j)Qq&grg*l94;&Q+PrtiGlJ7N zRbG9NCKEsD528?cJL4o(r|~cvEl%`JFaKrU=oWPy_D5x$>z>!jo($KZtG0b=V@^1c zO*jZqeyh?tzXVqSQyB)n$O`2;VZo#K;x3hg<5j-N++o=@``w9|Z+ zUqCy^>ghaU%>ES?8=L895T+1*n!c*&aM~- zj9Uk0kM+!_=7H6-@&Ajb$1#$9hTZ?x7H|NM>uh zv1t0L7QASMe&o}>J*PSq>V>}?oFtjR3u@_ZDx=EcsIrhM>4M9n%4**bbGV@6SLLtc zu-yz~`O`Sw3xjph_%66LU#Z+)U0Ir6TwPdMsI0BtuH3Gyl|Szk^>F#|)@FHgYh$~( zP+3|l-YRZ)qzanF^*}cEqM{%1;#YwmHl*HsnGPsrGl*1i;ghoV%J4#VVpVE&l38sW zKOtCQNE^anc_uES{lA4;ck+4QtNlO}RSdi(jvvYAf#}X``^})}18qP;ATU%C77ill zY=X!lToO4F6c2FEh13fl>lMT_jm(gkC2@hoMG|y(sknq*GCktJ^ry7usr!BgZOnbN zgA=Cy6$%GOmGOifFtE^g#lTXt06u}Ef8Zv!h^I%!cZOV2_F+aDM@-y9&!JvK6xty5a6;cER#!+YR3R2g zEcbKi)aid&FDVccciN2*?~#cU#$dY?1zPxWP436-kZbPO_QH6_4{H%XA*L`a&XG6| z(a(r?X#NPP=SP*L>d45-TM_V{MgR{~P^XAdi+5{LT%&{48d0mQCb9ki4>l7Xv@tvo z(N6I2XDDyOgM2xLgc9AaM#381>3=}NZ)m1Q!Vb-(9x!tj3Dy7!)+iDN@f;|~4oKGB z9)MwR|34lO^d*RQaeDDSglDIQUy|V_#ApCrIZMIIbOGx_|9`Rj7_Q$W&l87FJ^w3c zW1go>Pi%ZZ(>{Yx@ICm4j43zB-|!PVG&T2bC<~d97*#<}dzAdBGa^$CY+2%9nh}u& zGh5w!sl4kyD?O2MG?u-<4e=h{mbkl(FxUbn`>vq9h!03yBS9Dz*GY`S)iXs2)=_uN zpLNGYa>s`M>@&Q018;b!s|AW(17EmI%tQMh`r`OXsqD+A0j~|ykF169UP--B;uf7! z`y&ezxkHp;!bultq(b6tG51ltNB&Ds_jAlTJ>3$tI)fN~h+`HWeba;kMO6`dWuhv= zJ%4ZX*t(9q3Uv?=VD%~;TN)W zCyjIW&>7D4ti+C6L6le_M5Z3bG607(RBZB6bux;hpxII#y+$=hFMklXBXxEMlVdNV zdy%?cW_fdpx%i6vu#I7dGU-VK`x}%Yk2w5G3?gQ*p%wmyf8p6n$Ur@_XQLj?^fK}X zVi@YtjIKvvr+Sn-Ds$*)2L4WS4B1OOLC@`>2L?JkdotstoT$GY!Q1O$<(TGdI7 zQr}0z!qrO(QSh8RtzDcH;*g(co~6+)inJWLtvg!Uu6mNP-l{jD{j5`;ASk3EfpW=6 zn0Q-+a0Q_}jh~-TkBXSlEVe9kSpDe4oEE8TXOZv@#gPHaX!mu3%VtyuKu}fE8Utjw zrVVtsrdWq<3>iPATBqxLaP9iHH$D`1eEvHhUH$hyYWekmJhVLKp$%x>g~T0}b{g@?m0J48 zu9OxbD78u=crF?Wf9g(5Np@gM`J#(VC;eTLjb*8Ye+y}{rT;a8s&ieE3CapOEegsW vDftM`g04%@jz+FtgFu)6?ww}L+hK5zqJyM0uOdbkOxGTK9ki~SckO=ynuPsG delta 986 zcmZ8gO=}ZD7@nEkY&IWFZEZ^rMW)q47fZ3W^&oB0L;bi!X*~sD*>omt>}EI4Oq7I# zfEMu@;zdwsJbD%U5k2?=;vY~vdh+J`Zcu{@^UO2ze(XE%JoAhHC=`N;i7bI{W^<*y zZ}kcb^iJ>U+GQrJQ&Mt;EmAONgd@^0X9YbVp8FjGioNr1wABAZm(saftc@S`ztWF% zrvHk)pLW4P+vrBKRL#NMJAd2%Vy$)=aD^6?LU0h$2pHlyrF>XDkzw<8q@PQrCed;M zVG3ay;UdD!Uzi0poH7zC7z@qA_P+||VRZ>|ExG1 zZ@_=8aJR#tRu;i7HtQ#5X{{&S5z@#*Q;nmpA6NV+_8VF%zZ%sWW^Fr+4}(ze0vQE+ zR%`j548wSp-{(P%2mEORS`5)VNUo+D9r1E()>_vehBDBS%g9JI=C=xtg4dPQnww+E z_ll49_W6ly-t6+0dpC|nxhWSF>cOL}>E)Gr^O~pm&JIVq;pw3hHw+Z#v62NggpF1q z&``BWe6~D|n_FI1WgyEim7GPPoK+4t^0(?uzuc&XrW!%I(HV#H6<;66jZpYX)|G!0 z#^oUNqo5`gibhQ!6ac)GT0oA{E9EfP=;!OBMI~0Xz-*2$Zj9@)Uv#c4p>{QjoUlA| zsagc)<%aH`s???a=gEx+NzKV2oO%;)D|s^tWt)VIV!Z diff --git a/asked_questions_log.txt b/asked_questions_log.txt index 6728411..328b08a 100644 --- a/asked_questions_log.txt +++ b/asked_questions_log.txt @@ -5,4 +5,66 @@ weather test /usr/local/bin/python3 "/Volumes/GoogleDrive-100976413726208790908/My Drive/UNI/INFS 2048 - System Design and Realisation/Assessment 2B/main.py" weather -today \ No newline at end of file +today +birthday +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +#!@#!@#@!%#$@#$@%#$%$$#!$#@!#@weather +weather +Hello +$$% +$@$@$@$@$@$ +#$#$#$#$ +#$#$#$# +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather +weather diff --git a/main.py b/main.py index ec33bab..6939fe8 100644 --- a/main.py +++ b/main.py @@ -1,43 +1,93 @@ import json +import string -def jaccard_similarity(userQuestion, questionBank): - s1 = set(userQuestion) - s2 = set(questionBank) - return float(len(s1.intersection(s2)) / len(s1.union(s2))) +def sanitiseInput(dirtyQuestion): + """Sanitise a user inputted question to make it lower case, remove punctuation -def most_likely(userQuestion): + Arguments: + dirtyQuestion -- a question that contains punctuation and different-cased letter + + Returns: + String with the user's input without punctuation and all in lower cawse + """ + # Translate the dirty input into a sanitised one by removing all punctuation characters + # maketrans creates a table of characters to change, change with, and remove. In this case, no changes except remove punctuation. + # Derived from: https://datagy.io/python-remove-punctuation-from-string/ + newQuestion = dirtyQuestion.translate(str.maketrans('', '', string.punctuation)) + newQuestion = newQuestion.lower().strip() + return newQuestion + +def jaccardSimilarity(userQuestion, questionBank): + """Check the similarity of a user defined question to the bank of questions available + + Arguments: + userQuestion -- question that the user has entered + questionBank -- python json object of questions and answers + + Returns: + Float between 0 and 1 of similarity (1 being exactly the same) + """ + s1 = set(sanitiseInput(userQuestion)) + s2 = set(sanitiseInput(questionBank)) + return float(len(s1.intersection(s2)) / len(s1.union(s2))) + +def mostLikely(userQuestion): + """Checks how likely a user entered question is to the list of known questions + + Arguments: + userQuestion -- question that the user has entered + + Returns: + String of the most likely question from the known user bank + """ likelihoodScore = [] fileObject = open("faq.json", "r") jsonContent = fileObject.read() - aList = json.loads(jsonContent) - for json_object in aList: - likelihoodScore.append(jaccard_similarity(userQuestion, json_object['question'])) - #print(likelihoodScore) - + questionBank = json.loads(jsonContent) + # Using list of known questions, iterate through and determine similarity of all of them + for faqPair in questionBank: + likelihoodScore.append(jaccardSimilarity(userQuestion, faqPair['question'])) + + # Get the question that has the highest likelihood mostLikelyIndex = likelihoodScore.index(max(likelihoodScore)) - mostLikely = aList[mostLikelyIndex] - - fileObject.close() + mostLikely = questionBank[mostLikelyIndex] + + fileObject.close() # Close file to prevent IO errors return mostLikely +def continueQuestions(userQuestion): + """Check whether question has any content in it. + + Arguments: + userQuestion -- question that the user has entered + + Returns: + False if input is empty or True if required to re-loop + """ + if sanitiseInput(userQuestion): + likelyQuestion = mostLikely(userQuestion) + print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n") + # Open the file in append & read mode ('a+') + with open("asked_questions_log.txt", "a+") as file_object: + file_object.write(userQuestion + "\n") + return True + else: + # return False if there is no input to exit the program + return False + def main(): print("Hello, I am a question answering bot.\n") + userQuestion = '' - while True: - userQuestion = input('Please enter a question, and press the ENTER key: \n').strip() - if userQuestion: - likelyQuestion = most_likely(userQuestion) - print ("I think that you asked " + "'" + (likelyQuestion['question']) + "'" + " and conclude that the answer is " + "'" + (likelyQuestion['answer']) + "'.\n") - # Open the file in append & read mode ('a+') - with open("asked_questions_log.txt", "a+") as file_object: - file_object.write("\n") - file_object.write(userQuestion) - - - else : - print ("Goodbye!") - exit() + userQuestion = input('Please enter a question, and press the ENTER key: \n') + + # Loop again and again when user enters a question and provide user with results + while continueQuestions(userQuestion): + userQuestion = input('Please enter a question, and press the ENTER key: \n') + + # Exit program gracefully with a message + print("Goodbye!") if __name__ == "__main__": main() \ No newline at end of file diff --git a/test_questionbot.py b/test_questionbot.py index b324dd8..715fed0 100644 --- a/test_questionbot.py +++ b/test_questionbot.py @@ -1,25 +1,55 @@ import main import pytest - -#def test_CanInstantiatejaccard_similarity(): -# co = main.jaccard_similarity(userQuestion="Weather", questionBank="Same as yesterday.") -# assert main.jaccard_similarity(float) +from os.path import exists def test_likelyQuestion(): - likelyQuestion = main.most_likely("birthday") + likelyQuestion = main.mostLikely("birthday") assert likelyQuestion['question'] == 'What is your birthday?' def test_likelyAnswer(): - likelyAnswer = main.most_likely("weather today") + likelyAnswer = main.mostLikely("weather today") assert likelyAnswer['answer'] == 'Same as yesterday.' def test_detection_of_no_user_input(): - # Override the Python built-in input method - main.main.input = lambda: '' - # Call the function you would like to test (which uses input) - output = main.main() - assert output == [ - 'Hello, I am a question answering bot.', - 'Please enter a question, and press the ENTER key: ', - 'Goodbye!' -] \ No newline at end of file + userQuestion = '' + assert not main.continueQuestions(userQuestion) + +def testDetectionOfUserInput(): + userQuestion = 'weather' + assert main.continueQuestions(userQuestion) + +def testFaqJsonFileExists(): + fileName = exists('./faq.json') + assert fileName + +# Check whether log file gets made after a successful run + +def testLogfileExists(): + logName = exists('./asked_questions_log.txt') + assert logName + +def readFileLines(fileName): + file = open(fileName, "r") + # Create a count of non-empty lines + nonemptyLines = [line.strip("\n") for line in file if line != "\n"] + lineCount = len(nonemptyLines) + file.close() + return lineCount + +def testLogfileLinecountIncreases(): + oldLineCount = readFileLines("./asked_questions_log.txt") + + userQuestion = 'weather' + main.continueQuestions(userQuestion) + + newLineCount = readFileLines("./asked_questions_log.txt") + + assert newLineCount == oldLineCount + 1 + +def testDetectionIfUserEntersSymbolsOnly(): + userQuestion = '#$%^&*' + assert not main.continueQuestions(userQuestion) + +def testDetectionIfUserEntersSpaceOnly(): + userQuestion = ' ' + assert not main.continueQuestions(userQuestion) \ No newline at end of file