Skip to content

Commit

Permalink
fix data loading
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasFaria authored Dec 10, 2024
1 parent c1b5075 commit 8332082
Showing 1 changed file with 1 addition and 6 deletions.
7 changes: 1 addition & 6 deletions src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import fasttext
import mlflow
import pandas as pd
import pyarrow.parquet as pq
from sklearn.model_selection import train_test_split
from preprocessor import Preprocessor
from constants import TEXT_FEATURE, Y, DATA_PATH, LABEL_PREFIX
Expand All @@ -18,11 +17,7 @@ def load_data():
"""
Load data for training and test.
"""
fs = s3fs.S3FileSystem(
client_kwargs={"endpoint_url": "https://minio.lab.sspcloud.fr"},
anon=True
)
df = pq.ParquetDataset(DATA_PATH, filesystem=fs).read_pandas().to_pandas()
df = pd.read_parquet(f"https://minio.lab.sspcloud.fr/{DATA_PATH}")
return df.sample(frac=0.1)


Expand Down

0 comments on commit 8332082

Please sign in to comment.