apache · jorisvandenbossche · Dec 21, 2023 · Dec 5, 2023 · Dec 6, 2023 · Dec 6, 2023
diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
@@ -511,36 +511,20 @@ from a remote filesystem into a pandas dataframe you may need to run
 ``sort_index`` to maintain row ordering (as long as the ``preserve_index``
 option was enabled on write).
 
-.. note::
-
-   The ParquetDataset is being reimplemented based on the new generic Dataset
-   API (see the :ref:`dataset` docs for an overview). This is not yet the
-   default, but can already be enabled by passing the ``use_legacy_dataset=False``
-   keyword to :class:`ParquetDataset` or :func:`read_table`::
-
-      pq.ParquetDataset('dataset_name/', use_legacy_dataset=False)
-
-   Enabling this gives the following new features:
-
-   - Filtering on all columns (using row group statistics) instead of only on
-     the partition keys.
-   - More fine-grained partitioning: support for a directory partitioning scheme
-     in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
-     "/year=2019/month=11/day=15/"), and the ability to specify a schema for
-     the partition keys.
-   - General performance improvement and bug fixes.
+Other features:
 
-   It also has the following changes in behaviour:
+- Filtering on all columns (using row group statistics) instead of only on
+  the partition keys.
+- Fine-grained partitioning: support for a directory partitioning scheme
+  in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
+  "/year=2019/month=11/day=15/"), and the ability to specify a schema for
+  the partition keys.
 
-   - The partition keys need to be explicitly included in the ``columns``
-     keyword when you want to include them in the result while reading a
-     subset of the columns
+Note:
 
-   This new implementation is already enabled in ``read_table``, and in the
-   future, this will be turned on by default for ``ParquetDataset``. The new
-   implementation does not yet cover all existing ParquetDataset features (e.g.
-   specifying the ``metadata``, or the ``pieces`` property API). Feedback is
-   very welcome.
+- The partition keys need to be explicitly included in the ``columns``
+  keyword when you want to include them in the result while reading a
+  subset of the columns
 
 
 Using with Spark

diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
@@ -29,35 +29,6 @@
     pq = None
 
 
-class ParquetManifestCreation(object):
-    """Benchmark creating a parquet manifest."""
-
-    size = 10 ** 6
-    tmpdir = None
-
-    param_names = ('num_partitions', 'num_threads')
-    params = [(10, 100, 1000), (1, 8)]
-
-    def setup(self, num_partitions, num_threads):
-        if pq is None:
-            raise NotImplementedError("Parquet support not enabled")
-
-        self.tmpdir = tempfile.mkdtemp('benchmark_parquet')
-        rnd = np.random.RandomState(42)
-        num1 = rnd.randint(0, num_partitions, size=self.size)
-        num2 = rnd.randint(0, 1000, size=self.size)
-        output_df = pd.DataFrame({'num1': num1, 'num2': num2})
-        output_table = pa.Table.from_pandas(output_df)
-        pq.write_to_dataset(output_table, self.tmpdir, ['num1'])
-
-    def teardown(self, num_partitions, num_threads):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
-
-    def time_manifest_creation(self, num_partitions, num_threads):
-        pq.ParquetManifest(self.tmpdir, metadata_nthreads=num_threads)
-
-
 class ParquetWriteBinary(object):
 
     def setup(self):