构建可分割的train_test_split dataset


#Create a custom dataset
class TextClassificationDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

dataset = TextClassificationDataset(features, labels)

#Split the data into training and testing sets and create data loaders
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42,stratify=labels)

batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

猜你喜欢

转载自blog.csdn.net/qq_38735017/article/details/132632047