Skip to content

Commit

Permalink
Merge pull request #142 from aakankshaduggal/drop-index-0-columns
Browse files Browse the repository at this point in the history
Drop `__index_level_0__` columns
  • Loading branch information
markmc authored Jul 16, 2024
2 parents ac409fc + a3681ee commit 5c52d0f
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/instructlab/sdg/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ def _drop_duplicates(self, dataset, cols):
Drop duplicates from the dataset based on the columns provided.
"""
df = dataset.to_pandas()
df.drop_duplicates(subset=cols, inplace=True)
return Dataset.from_pandas(df)
df = df.drop_duplicates(subset=cols).reset_index(drop=True)
ds = Dataset.from_pandas(df)
return ds

def generate(self, dataset) -> Dataset:
"""
Expand Down

0 comments on commit 5c52d0f

Please sign in to comment.