-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
29 lines (21 loc) · 922 Bytes
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
import torch
from torch.utils.data.dataset import random_split
from torch.utils.data import TensorDataset
from math import ceil
def get_data():
train_data = pd.read_csv("new_train.csv")
y = train_data["target"]
X = train_data.drop(["ID_code","target"], axis=1)
# Converting both X and y to tensors
X_tensor = torch.tensor(X.values,dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype= torch.float32)
ds = TensorDataset(X_tensor, y_tensor)
train_ds, val_ds = random_split(ds,[int(0.8 * len(ds)),ceil(0.2 * len(ds))])
test_data = pd.read_csv("new_test.csv")
test_ids = test_data["ID_code"]
X = test_data.drop(["ID_code"], axis=1)
X_tensor = torch.tensor(X.values, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype = torch.float32)
test_ds = TensorDataset(X_tensor, y_tensor)
return train_ds, val_ds, test_ds,test_ids