Merge pull request #4 from Transconnectome/danny

added the plotting and extraction of the meaningful connections mask thing
Transconnectome · Sep 25, 2022 · d3b6ca3 · d3b6ca3
2 parents 7fe6e02 + dc6cc34
commit d3b6ca3
Show file tree

Hide file tree

Showing 152 changed files with 117,754 additions and 4,689 deletions.
diff --git a/Brain_Connectivity/.ipynb_checkpoints/TVB_version-checkpoint.ipynb b/Brain_Connectivity/.ipynb_checkpoints/TVB_version-checkpoint.ipynb
diff --git a/Brain_Connectivity/.ipynb_checkpoints/[IMP])_TVB_version-checkpoint.ipynb b/Brain_Connectivity/.ipynb_checkpoints/[IMP])_TVB_version-checkpoint.ipynb
diff --git a/Brain_Connectivity/SC_mask.csv.npy b/Brain_Connectivity/SC_mask.csv.npy
diff --git a/Brain_Connectivity/SC_mask.npy b/Brain_Connectivity/SC_mask.npy
diff --git a/Brain_Connectivity/TVB_version.ipynb b/Brain_Connectivity/TVB_version.ipynb
diff --git a/Brain_Connectivity/[IMP])_TVB_version.ipynb b/Brain_Connectivity/[IMP])_TVB_version.ipynb
diff --git a/__MACOSX/._abcd_cbcl01.txt b/__MACOSX/._abcd_cbcl01.txt
diff --git a/__MACOSX/._abcd_cbcls01.txt b/__MACOSX/._abcd_cbcls01.txt
diff --git a/__MACOSX/._abcd_tbss01.txt b/__MACOSX/._abcd_tbss01.txt
diff --git a/pheno_data/abcd_cbcl01.txt b/pheno_data/abcd_cbcl01.txt
diff --git a/pheno_data/abcd_cbcls01.txt b/pheno_data/abcd_cbcls01.txt
diff --git a/pheno_data/abcd_tbss01.txt b/pheno_data/abcd_tbss01.txt
diff --git a/possible_extraction_files_PAST_STUFF/.ipynb_checkpoints/Danny_version_4-checkpoint.ipynb b/possible_extraction_files_PAST_STUFF/.ipynb_checkpoints/Danny_version_4-checkpoint.ipynb
diff --git a/possible_extraction_files_PAST_STUFF/Danny_version_4.ipynb b/possible_extraction_files_PAST_STUFF/Danny_version_4.ipynb
diff --git a/possible_extraction_files_PAST_STUFF/ExploringData.py b/possible_extraction_files_PAST_STUFF/ExploringData.py
@@ -0,0 +1,34 @@
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+sns.set()
+
+a=np.array([10])
+data=pd.read_csv("DATA.csv", index_col='subjectkey') #imported the data #set index as subjectkey
+
+'''====step0: divide into features_matrix/target arrays (training등으로 바꾸는건 나중에)'''
+output=data['sex']
+input=data.drop(['sex'], axis=1) #drop column, so axis=1
+
+'''===========step0-1: see the min/max values of each column to see if they are normalized========'''
+
+aa=input.mean(axis=0) #이렇게 하면 column별로의 average를 볼 수 있는 것 같다
+
+#이제 이 aa를 plot하든 distribution을 보든지 해서, 값이 일정 range안에 모두 존재하는지 보자
+#만약 값들이 scale이 다르면 normalization을 거쳐야 할듯
+
+plt.hist(aa.values,bins=10)
+plt.show()
+#너무나도 큰 값의 차이가 보이기에, normalization을 거처야 할듯하다.
+
+'''====step0-2. Go through normalizatoin(?) to tame it a bit'''
+#값들이 0이상이니 그냥 arctanh()로 하자
+input=np.tanh(input) #normalize the inputs using tanh
+
+#open this when I us eit
+input.to_csv("input.csv")#saves in the current cwd with file name I provide (and also the extension (.csv) I provide)
+output.to_csv("output.csv")#saves in the current cwd with file name I provide (and also the extension (.csv) I provide)
+
+
+
diff --git a/possible_extraction_files_PAST_STUFF/Extract Data_2.py b/possible_extraction_files_PAST_STUFF/Extract Data_2.py
@@ -0,0 +1,61 @@
+import pandas as pd
+import numpy as np
+
+'''===step0: import the datas===='''
+
+phenotype=pd.read_csv("demo.total.csv")
+con_count=pd.read_csv("con_aparc_count.csv")
+con_fa=pd.read_csv("con_aparc_fa.csv")
+con_length=pd.read_csv("con_aparc_length.csv")
+con_rd=pd.read_csv("con_aparc_rd.csv")
+#if I want to use the absolute file, i need to put r in front of the string (see link below)
+#used https://stackoverflow.com/questions/37400974/unicode-error-unicodeescape-codec-cant-decode-bytes-in-position-2-3-trunca#
+
+wholedata=[phenotype,con_count,con_fa,con_length,con_rd] #for use in for loops
+
+
+'''====step1: set the index as the subject key==='''
+for i in wholedata:
+    i.set_index('subjectkey',inplace=True)#BECAREFUL!!!(inplace is needed!!)
+    #this is the same as
+    #i=i.set_index('subjectkey')
+
+'''
+**below: to see if the subjectkeys have successfully been made into the index
+for i in wholedata:
+    print(i.head())
+'''
+#extra step 0: phenotype subject key is weird: so change it
+phenotype.index=phenotype.index.str.replace("_","") #changes _ with nothing
+
+
+data_sex=phenotype['sex'] #only extract the phenotype data
+
+
+wholedata[0]=data_sex #set data_sex as new
+#meta-data of data_sex Series로 "sex"가 있기에 굳이 column name을 지정해줄 필요가 없음
+#(원래는 Series에는 column name이 없으니 따로 지정해줘야하는줄 알았다)
+'''=====step2: concatenate the data===(along axis=1, with only the intersections'''
+total=pd.concat([i for i in wholedata],axis=1,join="inner") #join inner means only taking key values that appear in both cases
+
+#total is the final thing we want!
+
+'''=====step4: check for NaN values and remove samples with that '''
+print((np.count_nonzero(total.isnull()))) #we can see that lots of null values exist!!
+
+total.dropna(inplace=True) #inplace=True so that the total itself is changed
+#axis=0 by default, how='any' by default,
+#for reference, look at :https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html
+
+print((np.count_nonzero(total.isnull()))) #prints zero, as expected (no null values)
+
+total.to_csv("DATA.csv")#saves in the current cwd with file name I provide (and also the extension (.csv) I provide)
+
+
+
+
+
+#todo: read https://intellij-support.jetbrains.com/hc/en-us/community/posts/360004715019-My-python-file-will-execute-code-from-another-python-file-and-when-I-deleted-the-file-the-code-was-excuted-from-other-files-it-gave-me-an-error
+
+#todo: 학교 tech support에 전화해서 microsoft word다시 작동하게 하기(논문작성 마무리해야함)
+