From 6d18c036775b648c51085693635146dad7198a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=BChler=20Ma=C3=ABlys?= Date: Sat, 13 Apr 2024 10:53:29 +0200 Subject: [PATCH 1/2] modify text2csv.py to work with question on multiple lines --- text2csv.py | 91 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/text2csv.py b/text2csv.py index f1b20b7..1887b3d 100644 --- a/text2csv.py +++ b/text2csv.py @@ -13,50 +13,68 @@ def txt_to_csv(path): dist2=[] dist3=[] dist4=[] + options=[[],[],[],[]] with open(path, errors='ignore',mode="r") as file1: files = file1.readlines() i=0 for i in range(len(files)): if files[i][0]=='\n': + if len(files) == i + 1: + continue + #avoid questions begginning with #, which break the pattern + if len(files[i+1]) >= 3 and files[i+1][3]=='#': + continue + try: - if files[i+1][3]=='#': - continue - questions.append(files[i+1][3:len(files)-1]) - key.append(files[i+2][2:len(files[i+2])-1]) - if (files[i+3]!="\n"): - dist1.append(files[i+3][2:len(files[i+3])-1]) - else: - dist1.append(np.nan) - dist2.append(np.nan) - dist3.append(np.nan) - dist4.append(np.nan) - continue - if (files[i+4]!="\n"): - dist2.append(files[i+4][2:len(files[i+4])-1]) - else: - dist2.append(np.nan) - dist3.append(np.nan) - dist4.append(np.nan) - continue - if (files[i+5]!="\n"): - dist3.append(files[i+5][2:len(files[i+5])-1]) - else: - dist3.append(np.nan) - dist4.append(np.nan) - continue - if (files[i+6]!="\n"): - dist4.append(files[i+6][2:len(files[i+6])-1]) - else: - dist4.append(np.nan) - except: + #QUESTION'S TEXT + j = 1 + question_text = "" + while files[i+j][0] != '^': + question_text += files[i+j] + j += 1 + #remove #Q on the beginning + question_text = question_text[3:len(question_text)-1] + question_text = question_text.replace('\n', ' ') + questions.append(question_text) + + #QUESTION'S ANSWER + answer_text = "" + while files[i+j][0] != 'A': + answer_text += files[i+j] + j += 1 + #remove ^ on the beginning + answer_text = answer_text[2:len(answer_text)-1] + answer_text = answer_text.replace('\n', ' ') + key.append(answer_text) + + #QUESTION'S OPTIONS + options_letters = ['B', 'C', 'D', '\n'] + nb_options_found = 0 + option_text = "" + while files[i+j][0] != '#' and nb_options_found < 4: + if files[i+j][0] == options_letters[nb_options_found] or files[i+j][0] == '\n': + #remove letter on the beginning + option_text = option_text[2:len(option_text)-1] + option_text = option_text.replace('\n', ' ') + options[nb_options_found].append(option_text) + option_text = "" + nb_options_found += 1 + else: + option_text += files[i+j] + j += 1 + for k in range(0, len(options)): + if len(options[k]) != len(options[0]): + options[k].append(np.nan) + except IndexError: pass + bank={} bank["Questions"]=questions bank["Correct"]=key - bank["A"]=dist1 - bank["B"]=dist2 - bank["C"]=dist3 - bank["D"]=dist4 + bank["A"]=options[0] + bank["B"]=options[1] + bank["C"]=options[2] + bank["D"]=options[3] df=pd.DataFrame(bank) return df @@ -72,5 +90,6 @@ def parse_files(sourcePath='/content/drive/MyDrive/Colab Notebooks/Data_trivial/ data=txt_to_csv(path) data.to_csv(destination+files+'.csv') -print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n Requirement Python 3.x , Numpy , os , Pandas \n or run this in Google Colab as it is') -parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path')) \ No newline at end of file +if __name__ == "__main__": + print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n Requirement Python 3.x , Numpy , os , Pandas \n or run this in Google Colab as it is') + parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path')) \ No newline at end of file From 06c8e6b409b481874defd20d5ea545185ec73928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=BChler=20Ma=C3=ABlys?= Date: Sat, 13 Apr 2024 11:11:25 +0200 Subject: [PATCH 2/2] change names of variable files to lines --- text2csv.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/text2csv.py b/text2csv.py index 1887b3d..3394e00 100644 --- a/text2csv.py +++ b/text2csv.py @@ -15,22 +15,22 @@ def txt_to_csv(path): dist4=[] options=[[],[],[],[]] with open(path, errors='ignore',mode="r") as file1: - files = file1.readlines() + lines = file1.readlines() i=0 - for i in range(len(files)): - if files[i][0]=='\n': - if len(files) == i + 1: + for i in range(len(lines)): + if lines[i][0]=='\n': + if len(lines) == i + 1: continue #avoid questions begginning with #, which break the pattern - if len(files[i+1]) >= 3 and files[i+1][3]=='#': + if len(lines[i+1]) >= 3 and lines[i+1][3]=='#': continue try: #QUESTION'S TEXT j = 1 question_text = "" - while files[i+j][0] != '^': - question_text += files[i+j] + while lines[i+j][0] != '^': + question_text += lines[i+j] j += 1 #remove #Q on the beginning question_text = question_text[3:len(question_text)-1] @@ -39,8 +39,8 @@ def txt_to_csv(path): #QUESTION'S ANSWER answer_text = "" - while files[i+j][0] != 'A': - answer_text += files[i+j] + while lines[i+j][0] != 'A': + answer_text += lines[i+j] j += 1 #remove ^ on the beginning answer_text = answer_text[2:len(answer_text)-1] @@ -51,8 +51,8 @@ def txt_to_csv(path): options_letters = ['B', 'C', 'D', '\n'] nb_options_found = 0 option_text = "" - while files[i+j][0] != '#' and nb_options_found < 4: - if files[i+j][0] == options_letters[nb_options_found] or files[i+j][0] == '\n': + while lines[i+j][0] != '#' and nb_options_found < 4: + if lines[i+j][0] == options_letters[nb_options_found] or lines[i+j][0] == '\n': #remove letter on the beginning option_text = option_text[2:len(option_text)-1] option_text = option_text.replace('\n', ' ') @@ -60,7 +60,7 @@ def txt_to_csv(path): option_text = "" nb_options_found += 1 else: - option_text += files[i+j] + option_text += lines[i+j] j += 1 for k in range(0, len(options)): if len(options[k]) != len(options[0]):