Skip to content

Commit

Permalink
Fix tfds builders that try to access gcs even though the data is local.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 666405348
  • Loading branch information
The TensorFlow Datasets Authors committed Aug 28, 2024
1 parent 858fbe5 commit 5b24415
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
12 changes: 8 additions & 4 deletions tensorflow_datasets/datasets/pg19/pg19_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ def _split_generators(self, dl_manager):
del dl_manager # Unused

metadata_dict = dict()
metadata_path = os.path.join(_DATA_DIR, 'metadata.csv')
if self._data_dir:
data_dir = self._data_dir
else:
data_dir = _DATA_DIR
metadata_path = os.path.join(data_dir, 'metadata.csv')
metadata = tf.io.gfile.GFile(metadata_path).read().splitlines()

for row in metadata:
Expand All @@ -62,21 +66,21 @@ def _split_generators(self, dl_manager):
name=tfds.Split.TRAIN,
gen_kwargs={
'metadata': metadata_dict,
'filepath': os.path.join(_DATA_DIR, 'train'),
'filepath': os.path.join(data_dir, 'train'),
},
),
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
gen_kwargs={
'metadata': metadata_dict,
'filepath': os.path.join(_DATA_DIR, 'validation'),
'filepath': os.path.join(data_dir, 'validation'),
},
),
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
gen_kwargs={
'metadata': metadata_dict,
'filepath': os.path.join(_DATA_DIR, 'test'),
'filepath': os.path.join(data_dir, 'test'),
},
),
]
Expand Down
2 changes: 2 additions & 0 deletions tensorflow_datasets/robotics/dataset_importer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def get_relative_dataset_location(self):
pass

def get_dataset_location(self):
if self._data_dir:
return self._data_dir
return os.path.join(
str(self._GCS_BUCKET), self.get_relative_dataset_location()
)
Expand Down

0 comments on commit 5b24415

Please sign in to comment.