预处理2

import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')
train_path = './train'
test_path = './test'
train_files = os.listdir(train_path)
test_files = os.listdir(test_path)
print(len(train_files), len(test_files))
train_files[:3]
test_files[:3]
df = pd.read_csv(f'{train_path}/6966.csv')
df.head()
df['type'].unique()
df.shape
ret = []
for file in tqdm(train_files):
    df = pd.read_csv(f'{train_path}/{file}')
    ret.append(df)
df = pd.concat(ret)
df.columns = ['ship','x','y','v','d','time','type']
df.to_hdf('./train.h5', 'df', mode='w')
ret = []
for file in tqdm(test_files):
    df = pd.read_csv(f'{test_path}/{file}')
    ret.append(df)
df = pd.concat(ret)
df.columns = ['ship','x','y','v','d','time']
df.to_hdf('./test.h5', 'df', mode='w')
df.shape
df.head()
发布了8 篇原创文章 · 获赞 0 · 访问量 85

猜你喜欢

转载自blog.csdn.net/fang156239305/article/details/103916029
今日推荐