uberspot · maelys-buhler · Apr 13, 2024 · Apr 13, 2024
diff --git a/text2csv.py b/text2csv.py
@@ -13,50 +13,68 @@ def txt_to_csv(path):
   dist2=[]
   dist3=[]
   dist4=[]
+  options=[[],[],[],[]]
   with open(path, errors='ignore',mode="r") as file1:
-      files = file1.readlines()
+      lines = file1.readlines()
       i=0
-      for i in range(len(files)):
-        if files[i][0]=='\n':
+      for i in range(len(lines)):
+        if lines[i][0]=='\n':
+          if len(lines) == i + 1:
+            continue
+          #avoid questions begginning with #, which break the pattern
+          if len(lines[i+1]) >= 3 and lines[i+1][3]=='#':
+            continue   
+
           try:
-            if files[i+1][3]=='#':
-              continue          
-            questions.append(files[i+1][3:len(files)-1])
-            key.append(files[i+2][2:len(files[i+2])-1])
-            if (files[i+3]!="\n"):
-              dist1.append(files[i+3][2:len(files[i+3])-1])
-            else:
-              dist1.append(np.nan)
-              dist2.append(np.nan)
-              dist3.append(np.nan)
-              dist4.append(np.nan)
-              continue
-            if (files[i+4]!="\n"):
-              dist2.append(files[i+4][2:len(files[i+4])-1])
-            else:
-              dist2.append(np.nan)
-              dist3.append(np.nan)
-              dist4.append(np.nan)
-              continue
-            if (files[i+5]!="\n"):
-              dist3.append(files[i+5][2:len(files[i+5])-1])
-            else:
-              dist3.append(np.nan)
-              dist4.append(np.nan)
-              continue
-            if (files[i+6]!="\n"):
-              dist4.append(files[i+6][2:len(files[i+6])-1])
-            else:
-              dist4.append(np.nan)
-          except:
+            #QUESTION'S TEXT
+            j = 1
+            question_text = ""
+            while lines[i+j][0] != '^':   
+              question_text += lines[i+j]
+              j += 1
+            #remove #Q on the beginning
+            question_text = question_text[3:len(question_text)-1]
+            question_text = question_text.replace('\n', ' ')
+            questions.append(question_text)
+
+            #QUESTION'S ANSWER
+            answer_text = ""
+            while lines[i+j][0] != 'A':   
+              answer_text += lines[i+j]
+              j += 1
+            #remove ^ on the beginning
+            answer_text = answer_text[2:len(answer_text)-1]
+            answer_text = answer_text.replace('\n', ' ')
+            key.append(answer_text)
+
+            #QUESTION'S OPTIONS
+            options_letters = ['B', 'C', 'D', '\n']
+            nb_options_found = 0
+            option_text = ""
+            while lines[i+j][0] != '#' and nb_options_found < 4:
+              if lines[i+j][0] == options_letters[nb_options_found] or lines[i+j][0] == '\n':
+                #remove letter on the beginning             
+                option_text = option_text[2:len(option_text)-1]
+                option_text = option_text.replace('\n', ' ')
+                options[nb_options_found].append(option_text)
+                option_text = ""
+                nb_options_found += 1
+              else:
+                option_text += lines[i+j]
+                j += 1
+            for k in range(0, len(options)):
+              if len(options[k]) != len(options[0]):
+                options[k].append(np.nan)
+          except IndexError:
             pass
+
   bank={}
   bank["Questions"]=questions
   bank["Correct"]=key
-  bank["A"]=dist1
-  bank["B"]=dist2
-  bank["C"]=dist3
-  bank["D"]=dist4
+  bank["A"]=options[0]
+  bank["B"]=options[1]
+  bank["C"]=options[2]
+  bank["D"]=options[3]
   df=pd.DataFrame(bank)
   return df
 
@@ -72,5 +90,6 @@ def parse_files(sourcePath='/content/drive/MyDrive/Colab Notebooks/Data_trivial/
      data=txt_to_csv(path)
      data.to_csv(destination+files+'.csv')
 
-print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n  Requirement Python 3.x , Numpy , os , Pandas \n  or run this in Google Colab as it is')
-parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path'))
+if __name__ == "__main__":
+    print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n  Requirement Python 3.x , Numpy , os , Pandas \n  or run this in Google Colab as it is')
+    parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path'))