diff --git a/text2csv.py b/text2csv.py index f1b20b7..3394e00 100644 --- a/text2csv.py +++ b/text2csv.py @@ -13,50 +13,68 @@ def txt_to_csv(path): dist2=[] dist3=[] dist4=[] + options=[[],[],[],[]] with open(path, errors='ignore',mode="r") as file1: - files = file1.readlines() + lines = file1.readlines() i=0 - for i in range(len(files)): - if files[i][0]=='\n': + for i in range(len(lines)): + if lines[i][0]=='\n': + if len(lines) == i + 1: + continue + #avoid questions begginning with #, which break the pattern + if len(lines[i+1]) >= 3 and lines[i+1][3]=='#': + continue + try: - if files[i+1][3]=='#': - continue - questions.append(files[i+1][3:len(files)-1]) - key.append(files[i+2][2:len(files[i+2])-1]) - if (files[i+3]!="\n"): - dist1.append(files[i+3][2:len(files[i+3])-1]) - else: - dist1.append(np.nan) - dist2.append(np.nan) - dist3.append(np.nan) - dist4.append(np.nan) - continue - if (files[i+4]!="\n"): - dist2.append(files[i+4][2:len(files[i+4])-1]) - else: - dist2.append(np.nan) - dist3.append(np.nan) - dist4.append(np.nan) - continue - if (files[i+5]!="\n"): - dist3.append(files[i+5][2:len(files[i+5])-1]) - else: - dist3.append(np.nan) - dist4.append(np.nan) - continue - if (files[i+6]!="\n"): - dist4.append(files[i+6][2:len(files[i+6])-1]) - else: - dist4.append(np.nan) - except: + #QUESTION'S TEXT + j = 1 + question_text = "" + while lines[i+j][0] != '^': + question_text += lines[i+j] + j += 1 + #remove #Q on the beginning + question_text = question_text[3:len(question_text)-1] + question_text = question_text.replace('\n', ' ') + questions.append(question_text) + + #QUESTION'S ANSWER + answer_text = "" + while lines[i+j][0] != 'A': + answer_text += lines[i+j] + j += 1 + #remove ^ on the beginning + answer_text = answer_text[2:len(answer_text)-1] + answer_text = answer_text.replace('\n', ' ') + key.append(answer_text) + + #QUESTION'S OPTIONS + options_letters = ['B', 'C', 'D', '\n'] + nb_options_found = 0 + option_text = "" + while lines[i+j][0] != '#' and nb_options_found < 4: + if lines[i+j][0] == options_letters[nb_options_found] or lines[i+j][0] == '\n': + #remove letter on the beginning + option_text = option_text[2:len(option_text)-1] + option_text = option_text.replace('\n', ' ') + options[nb_options_found].append(option_text) + option_text = "" + nb_options_found += 1 + else: + option_text += lines[i+j] + j += 1 + for k in range(0, len(options)): + if len(options[k]) != len(options[0]): + options[k].append(np.nan) + except IndexError: pass + bank={} bank["Questions"]=questions bank["Correct"]=key - bank["A"]=dist1 - bank["B"]=dist2 - bank["C"]=dist3 - bank["D"]=dist4 + bank["A"]=options[0] + bank["B"]=options[1] + bank["C"]=options[2] + bank["D"]=options[3] df=pd.DataFrame(bank) return df @@ -72,5 +90,6 @@ def parse_files(sourcePath='/content/drive/MyDrive/Colab Notebooks/Data_trivial/ data=txt_to_csv(path) data.to_csv(destination+files+'.csv') -print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n Requirement Python 3.x , Numpy , os , Pandas \n or run this in Google Colab as it is') -parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path')) \ No newline at end of file +if __name__ == "__main__": + print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n Requirement Python 3.x , Numpy , os , Pandas \n or run this in Google Colab as it is') + parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path')) \ No newline at end of file