From 6d18c036775b648c51085693635146dad7198a3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=BChler=20Ma=C3=ABlys?= <maelys.buhler@he-arc.ch>
Date: Sat, 13 Apr 2024 10:53:29 +0200
Subject: [PATCH 1/2] modify text2csv.py to work with question on multiple
 lines

---
 text2csv.py | 91 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 36 deletions(-)

diff --git a/text2csv.py b/text2csv.py
index f1b20b7..1887b3d 100644
--- a/text2csv.py
+++ b/text2csv.py
@@ -13,50 +13,68 @@ def txt_to_csv(path):
   dist2=[]
   dist3=[]
   dist4=[]
+  options=[[],[],[],[]]
   with open(path, errors='ignore',mode="r") as file1:
       files = file1.readlines()
       i=0
       for i in range(len(files)):
         if files[i][0]=='\n':
+          if len(files) == i + 1:
+            continue
+          #avoid questions begginning with #, which break the pattern
+          if len(files[i+1]) >= 3 and files[i+1][3]=='#':
+            continue   
+          
           try:
-            if files[i+1][3]=='#':
-              continue          
-            questions.append(files[i+1][3:len(files)-1])
-            key.append(files[i+2][2:len(files[i+2])-1])
-            if (files[i+3]!="\n"):
-              dist1.append(files[i+3][2:len(files[i+3])-1])
-            else:
-              dist1.append(np.nan)
-              dist2.append(np.nan)
-              dist3.append(np.nan)
-              dist4.append(np.nan)
-              continue
-            if (files[i+4]!="\n"):
-              dist2.append(files[i+4][2:len(files[i+4])-1])
-            else:
-              dist2.append(np.nan)
-              dist3.append(np.nan)
-              dist4.append(np.nan)
-              continue
-            if (files[i+5]!="\n"):
-              dist3.append(files[i+5][2:len(files[i+5])-1])
-            else:
-              dist3.append(np.nan)
-              dist4.append(np.nan)
-              continue
-            if (files[i+6]!="\n"):
-              dist4.append(files[i+6][2:len(files[i+6])-1])
-            else:
-              dist4.append(np.nan)
-          except:
+            #QUESTION'S TEXT
+            j = 1
+            question_text = ""
+            while files[i+j][0] != '^':   
+              question_text += files[i+j]
+              j += 1
+            #remove #Q on the beginning
+            question_text = question_text[3:len(question_text)-1]
+            question_text = question_text.replace('\n', ' ')
+            questions.append(question_text)
+        
+            #QUESTION'S ANSWER
+            answer_text = ""
+            while files[i+j][0] != 'A':   
+              answer_text += files[i+j]
+              j += 1
+            #remove ^ on the beginning
+            answer_text = answer_text[2:len(answer_text)-1]
+            answer_text = answer_text.replace('\n', ' ')
+            key.append(answer_text)
+
+            #QUESTION'S OPTIONS
+            options_letters = ['B', 'C', 'D', '\n']
+            nb_options_found = 0
+            option_text = ""
+            while files[i+j][0] != '#' and nb_options_found < 4:
+              if files[i+j][0] == options_letters[nb_options_found] or files[i+j][0] == '\n':
+                #remove letter on the beginning             
+                option_text = option_text[2:len(option_text)-1]
+                option_text = option_text.replace('\n', ' ')
+                options[nb_options_found].append(option_text)
+                option_text = ""
+                nb_options_found += 1
+              else:
+                option_text += files[i+j]
+                j += 1
+            for k in range(0, len(options)):
+              if len(options[k]) != len(options[0]):
+                options[k].append(np.nan)
+          except IndexError:
             pass
+
   bank={}
   bank["Questions"]=questions
   bank["Correct"]=key
-  bank["A"]=dist1
-  bank["B"]=dist2
-  bank["C"]=dist3
-  bank["D"]=dist4
+  bank["A"]=options[0]
+  bank["B"]=options[1]
+  bank["C"]=options[2]
+  bank["D"]=options[3]
   df=pd.DataFrame(bank)
   return df
 
@@ -72,5 +90,6 @@ def parse_files(sourcePath='/content/drive/MyDrive/Colab Notebooks/Data_trivial/
      data=txt_to_csv(path)
      data.to_csv(destination+files+'.csv')
 
-print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n  Requirement Python 3.x , Numpy , os , Pandas \n  or run this in Google Colab as it is')
-parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path'))
\ No newline at end of file
+if __name__ == "__main__":
+    print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n  Requirement Python 3.x , Numpy , os , Pandas \n  or run this in Google Colab as it is')
+    parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path'))
\ No newline at end of file

From 06c8e6b409b481874defd20d5ea545185ec73928 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=BChler=20Ma=C3=ABlys?= <maelys.buhler@he-arc.ch>
Date: Sat, 13 Apr 2024 11:11:25 +0200
Subject: [PATCH 2/2] change names of variable files to lines

---
 text2csv.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/text2csv.py b/text2csv.py
index 1887b3d..3394e00 100644
--- a/text2csv.py
+++ b/text2csv.py
@@ -15,22 +15,22 @@ def txt_to_csv(path):
   dist4=[]
   options=[[],[],[],[]]
   with open(path, errors='ignore',mode="r") as file1:
-      files = file1.readlines()
+      lines = file1.readlines()
       i=0
-      for i in range(len(files)):
-        if files[i][0]=='\n':
-          if len(files) == i + 1:
+      for i in range(len(lines)):
+        if lines[i][0]=='\n':
+          if len(lines) == i + 1:
             continue
           #avoid questions begginning with #, which break the pattern
-          if len(files[i+1]) >= 3 and files[i+1][3]=='#':
+          if len(lines[i+1]) >= 3 and lines[i+1][3]=='#':
             continue   
           
           try:
             #QUESTION'S TEXT
             j = 1
             question_text = ""
-            while files[i+j][0] != '^':   
-              question_text += files[i+j]
+            while lines[i+j][0] != '^':   
+              question_text += lines[i+j]
               j += 1
             #remove #Q on the beginning
             question_text = question_text[3:len(question_text)-1]
@@ -39,8 +39,8 @@ def txt_to_csv(path):
         
             #QUESTION'S ANSWER
             answer_text = ""
-            while files[i+j][0] != 'A':   
-              answer_text += files[i+j]
+            while lines[i+j][0] != 'A':   
+              answer_text += lines[i+j]
               j += 1
             #remove ^ on the beginning
             answer_text = answer_text[2:len(answer_text)-1]
@@ -51,8 +51,8 @@ def txt_to_csv(path):
             options_letters = ['B', 'C', 'D', '\n']
             nb_options_found = 0
             option_text = ""
-            while files[i+j][0] != '#' and nb_options_found < 4:
-              if files[i+j][0] == options_letters[nb_options_found] or files[i+j][0] == '\n':
+            while lines[i+j][0] != '#' and nb_options_found < 4:
+              if lines[i+j][0] == options_letters[nb_options_found] or lines[i+j][0] == '\n':
                 #remove letter on the beginning             
                 option_text = option_text[2:len(option_text)-1]
                 option_text = option_text.replace('\n', ' ')
@@ -60,7 +60,7 @@ def txt_to_csv(path):
                 option_text = ""
                 nb_options_found += 1
               else:
-                option_text += files[i+j]
+                option_text += lines[i+j]
                 j += 1
             for k in range(0, len(options)):
               if len(options[k]) != len(options[0]):