Merge pull request #1032 from VisLab/clean-up

Allow tabular files to be empty (column header only)
hed-standard · Oct 12, 2024 · 3df3f18 · 3df3f18
2 parents 385c88d + a527eee
commit 3df3f18
Show file tree

Hide file tree

Showing 18 changed files with 353 additions and 26 deletions.
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
@@ -449,32 +449,54 @@ def get_column_refs(self):
         return []
 
     def _open_dataframe_file(self, file, has_column_names, input_type):
-        pandas_header = 0
-        if not has_column_names:
-            pandas_header = None
+        """ Set the _dataframe property of BaseInput. """
+        pandas_header = 0 if has_column_names else None
 
+        # If file is already a DataFrame
         if isinstance(file, pd.DataFrame):
             self._dataframe = file.astype(str)
             self._has_column_names = self._dataframe_has_names(self._dataframe)
-        elif not file:
-            raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
-        elif input_type in self.TEXT_EXTENSION:
-            try:
-                self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header,
-                                              dtype=str, keep_default_na=True, na_values=("", "null"))
-            except Exception as e:
-                raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
-            # Convert nan values to a known value
-            self._dataframe = self._dataframe.fillna("n/a")
-        elif input_type in self.EXCEL_EXTENSION:
-            try:
-                self._loaded_workbook = openpyxl.load_workbook(file)
-                loaded_worksheet = self.get_worksheet(self._worksheet_name)
-                self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
-            except Exception as e:
-                raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e
-        else:
-            raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file)
+            return
 
-        if self._dataframe.size == 0:
-            raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file)
+        # Check for empty file or None
+        if not file:
+            raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file specification passed to BaseInput.", file)
+
+        # Handle Excel file input
+        if input_type in self.EXCEL_EXTENSION:
+            self._load_excel_file(file, has_column_names)
+            return
+
+        # Handle unsupported file extensions
+        if input_type not in self.TEXT_EXTENSION:
+            raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unsupported file extension for text files.",
+                               self.name)
+
+        # Handle text file input (CSV/TSV)
+        self._load_text_file(file, pandas_header)
+
+    def _load_excel_file(self, file, has_column_names):
+        """ Load an Excel file into a Pandas dataframe"""
+        try:
+            self._loaded_workbook = openpyxl.load_workbook(file)
+            loaded_worksheet = self.get_worksheet(self._worksheet_name)
+            self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
+        except Exception as e:
+            raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load Excel file: {str(e)}", self.name) from e
+
+    def _load_text_file(self, file, pandas_header):
+        """ Load an text file"""
+        if isinstance(file, str) and os.path.exists(file) and os.path.getsize(file) == 0:
+            self._dataframe = pd.DataFrame()  # Handle empty file
+            return
+
+        try:
+            self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header, skip_blank_lines=True,
+                                          dtype=str, keep_default_na=True, na_values=("", "null"))
+            # Replace NaN values with a known value
+            self._dataframe = self._dataframe.fillna("n/a")
+        except pd.errors.EmptyDataError:
+            self._dataframe = pd.DataFrame()  # Handle case where file has no data
+        except Exception as e:
+            raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load text file: {str(e)}",
+                               self.name) from e
diff --git a/tests/data/bids_tests/eeg_ds003645s_empty/CHANGES b/tests/data/bids_tests/eeg_ds003645s_empty/CHANGES
@@ -0,0 +1,6 @@
+1.0.0 2021-05-11
+  - First release
+Revision history for Face Recognition experiment by Wakeman-Henson
+
+version 1.0 - April 2021
+ - Initial release of EEG data in this experiment for HED education purposes
diff --git a/tests/data/bids_tests/eeg_ds003645s_empty/README b/tests/data/bids_tests/eeg_ds003645s_empty/README
@@ -0,0 +1,24 @@
+**Introduction:**
+This dataset consists of the MEEG (sMRI+MEG+EEG) portion of the multi-subject, multi-modal face processing dataset (ds000117). This dataset was originally acquired and shared by Daniel Wakeman and Richard Henson (https://pubmed.ncbi.nlm.nih.gov/25977808/). The data has been repackaged in EEGLAB format and has undergone minimal preprocessing as well as reorganization and annotation of the dataset events.
+
+**Overview of the experiment:**
+Eighteen participants completed two recording sessions spaced three months apart – one session recorded fMRI  and the other simultaneously recorded MEG and EEG data. During each session, participants performed the same simple perceptual task, responding to presented photographs of famous, unfamiliar, and scrambled faces by pressing one of two keyboard keys to indicate a subjective yes or no decision as to the relative spatial symmetry of the viewed face. Famous faces were feature-matched to unfamiliar faces; half the faces were female. The two sessions (MEEG, fMRI) had different organizations of event timing and presentation because of technological requirements of the respective imaging modalities. Each individual face was presented twice during the session. For half of the presented faces, the second presentation followed immediately after the first. For the other half, the second presentation was delayed by 5-15 face presentations.
+
+**Preprocessing:**
+The preprocessing, which was performed using the `wh_extracteeg_BIDS.m` located in the code directory, includes the following steps:
+* Ignore MRI data except for sMRI.
+* Extract  EEG channels out of the MEG/EEG fif data
+* Add fiducials
+* Rename EOG and EKG channels
+* Extract events from event channel
+* Remove spurious events 5, 6, 7, 13, 14, 15, 17, 18 and 19
+* Remove spurious event 24 for subject 3 run 4
+* Rename events taking into account button assigned to each subject
+* Correct event latencies (events have a shift of 34 ms)
+* Resample data to 250 Hz (this step is performed because this dataset is used in a tutorial for EEGLAB and needs to be lightweight)
+* Remove event fields `urevent` and `duration`
+* Save as EEGLAB .set format
+
+**Data curators:**
+Ramon Martinez, Dung Truong, Scott Makeig, Arnaud Delorme (UCSD, La Jolla, CA, USA), Kay Robbins (UTSA, San Antonio, TX, USA)
+
diff --git a/tests/data/bids_tests/eeg_ds003645s_empty/dataset_description.json b/tests/data/bids_tests/eeg_ds003645s_empty/dataset_description.json
@@ -0,0 +1,24 @@
+{
+    "Name": "Face processing MEEG dataset with HED annotation",
+    "BIDSVersion": "1.9.0",
+    "HEDVersion": "8.2.0",
+    "License": "CC0",
+    "Authors": [
+        "Daniel G. Wakeman",
+        "Richard N Henson",
+        "Dung Truong (curation)",
+        "Kay Robbins (curation)",
+        "Scott Makeig (curation)",
+        "Arno Delorme (curation)"
+    ],
+    "ReferencesAndLinks": [
+        "Wakeman, D., Henson, R. (2015). A multi-subject, multi-modal human neuroimaging dataset. Sci Data 2, 150001. https://doi.org/10.1038/sdata.2015.1",
+        "Robbins, K., Truong, D., Appelhoff, S., Delorme, A., & Makeig, S. (2021). Capturing the nature of events and event context using Hierarchical Event Descriptors (HED). In press for NeuroImage Special Issue Practice in MEEG. NeuroImage 245 (2021) 118766. Online: https://www.sciencedirect.com/science/article/pii/S1053811921010387.",
+        "Robbins, K., Truong, D., Jones, A., Callanan, I., & Makeig, S. (2021). Building FAIR functionality: Annotating events in time series data using Hierarchical Event Descriptors (HED). Neuroinformatics Special Issue Building the NeuroCommons. Neuroinformatics https://doi.org/10.1007/s12021-021-09537-4. Online: https://link.springer.com/article/10.1007/s12021-021-09537-4."
+    ],
+    "Funding": [
+        "Experiment was supported by the UK Medical Research Council (MC_A060_5PR10) and Elekta Ltd.",
+        "Curation was supported by: Army Research Laboratory W911NF-10-2-0022, NIH R01 EB023297-03, NIH R01 NS047293-l4, and NIH R24 MH120037-01."
+    ],
+    "DatasetDOI": "10.18112/openneuro.ds003645.v1.0.0"
+}
diff --git a/tests/data/bids_tests/eeg_ds003645s_empty/participants.json b/tests/data/bids_tests/eeg_ds003645s_empty/participants.json
@@ -0,0 +1,17 @@
+{
+  "participant_id": {
+    "LongName": "Participant identifier",
+    "Description": "Unique subject identifier"
+  },
+  "gender": {
+    "Description": "Sex of the subject",
+    "Levels": {
+      "M": "male",
+      "F": "female"
+    }
+  },
+  "age": {
+    "Description": "Age of the subject",
+    "Units": "years"
+  }
+}
diff --git a/tests/data/bids_tests/eeg_ds003645s_empty/participants.tsv b/tests/data/bids_tests/eeg_ds003645s_empty/participants.tsv
@@ -0,0 +1,3 @@
+participant_id	age	gender
+sub-002	31	M
+sub-003	25	M
diff --git a/...ata/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-1_eeg.json b/...ata/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-1_eeg.json
@@ -0,0 +1,24 @@
+{
+  "TaskName": "FacePerception",
+  "TaskDescription": "Subjects viewed stimuli on a screen during six, 7.5 minute runs. The stimuli were photographs of either a famous face (known to most of British or a scrambled face, and appeared for a random duration between 800 and 1,000 ms. Subjects were instructed to fixate centrally throughout the experiment. To ensure attention to each stimulus, participants were asked to press one of two keys with either their left or right index finger (assignment counter-balanced across participants). Their key-press was based on how symmetric they regarded each image: pressing one or the other key depending whether they thought the image was 'more' or 'less symmetric' than average.",
+  "InstitutionAddress": "15 Chaucer Road, Cambridge, UK",
+  "InstitutionName": "MRC Cognition & Brain Sciences Unit",
+  "EEGReference": "nose",
+  "EEGGround": "left collar bone",
+  "SamplingFrequency": 250,
+  "PowerLineFrequency": 50,
+  "SoftwareFilters": {
+    "LowPassFilter": {
+      "cutoff": "350 (Hz)"
+    }
+  },
+  "EEGPlacementScheme": "extended 10-10% system",
+  "CapManufacturer": "Easycap",
+  "EEGChannelCount": 70,
+  "EOGChannelCount": 2,
+  "RecordingType": "continuous",
+  "MiscChannelCount": 309,
+  "RecordingDuration": 494,
+  "ECGChannelCount": 0,
+  "EMGChannelCount": 0
+}
diff --git a/...data/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-1_eeg.set b/...data/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-1_eeg.set
diff --git a/...a/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-1_events.tsv b/...a/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-1_events.tsv
@@ -0,0 +1 @@
+onset	duration	sample	event_type	face_type	rep_status	trial	rep_lag	value	stim_file
diff --git a/...ata/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-2_eeg.json b/...ata/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-2_eeg.json
@@ -0,0 +1,24 @@
+{
+  "TaskName": "FacePerception",
+  "TaskDescription": "Subjects viewed stimuli on a screen during six, 7.5 minute runs. The stimuli were photographs of either a famous face (known to most of British or a scrambled face, and appeared for a random duration between 800 and 1,000 ms. Subjects were instructed to fixate centrally throughout the experiment. To ensure attention to each stimulus, participants were asked to press one of two keys with either their left or right index finger (assignment counter-balanced across participants). Their key-press was based on how symmetric they regarded each image: pressing one or the other key depending whether they thought the image was 'more' or 'less symmetric' than average.",
+  "InstitutionAddress": "15 Chaucer Road, Cambridge, UK",
+  "InstitutionName": "MRC Cognition & Brain Sciences Unit",
+  "EEGReference": "nose",
+  "EEGGround": "left collar bone",
+  "SamplingFrequency": 250,
+  "PowerLineFrequency": 50,
+  "SoftwareFilters": {
+    "LowPassFilter": {
+      "cutoff": "350 (Hz)"
+    }
+  },
+  "EEGPlacementScheme": "extended 10-10% system",
+  "CapManufacturer": "Easycap",
+  "EEGChannelCount": 70,
+  "EOGChannelCount": 2,
+  "RecordingType": "continuous",
+  "MiscChannelCount": 309,
+  "RecordingDuration": 494,
+  "ECGChannelCount": 0,
+  "EMGChannelCount": 0
+}
diff --git a/...data/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-2_eeg.set b/...data/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-2_eeg.set
diff --git a/...a/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-2_events.tsv b/...a/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-2_events.tsv
diff --git a/...ata/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-3_eeg.json b/...ata/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-3_eeg.json
@@ -0,0 +1,24 @@
+{
+  "TaskName": "FacePerception",
+  "TaskDescription": "Subjects viewed stimuli on a screen during six, 7.5 minute runs. The stimuli were photographs of either a famous face (known to most of British or a scrambled face, and appeared for a random duration between 800 and 1,000 ms. Subjects were instructed to fixate centrally throughout the experiment. To ensure attention to each stimulus, participants were asked to press one of two keys with either their left or right index finger (assignment counter-balanced across participants). Their key-press was based on how symmetric they regarded each image: pressing one or the other key depending whether they thought the image was 'more' or 'less symmetric' than average.",
+  "InstitutionAddress": "15 Chaucer Road, Cambridge, UK",
+  "InstitutionName": "MRC Cognition & Brain Sciences Unit",
+  "EEGReference": "nose",
+  "EEGGround": "left collar bone",
+  "SamplingFrequency": 250,
+  "PowerLineFrequency": 50,
+  "SoftwareFilters": {
+    "LowPassFilter": {
+      "cutoff": "350 (Hz)"
+    }
+  },
+  "EEGPlacementScheme": "extended 10-10% system",
+  "CapManufacturer": "Easycap",
+  "EEGChannelCount": 70,
+  "EOGChannelCount": 2,
+  "RecordingType": "continuous",
+  "MiscChannelCount": 309,
+  "RecordingDuration": 494,
+  "ECGChannelCount": 0,
+  "EMGChannelCount": 0
+}
diff --git a/...data/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-3_eeg.set b/...data/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-3_eeg.set
diff --git a/...a/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-3_events.tsv b/...a/bids_tests/eeg_ds003645s_empty/sub-004/eeg/sub-004_task-FacePerception_run-3_events.tsv
@@ -0,0 +1,2 @@
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		onset duration sample event_type face_type rep_status trial rep_lag value stim_file