1. Organización de datos:
Formación de generación de datos de validación:
python create_data.py nuscenes_data_prep --data_path=NUSCENES_TRAINVAL_DATASET_ROOT --version="v1.0-trainval" --max_sweeps=10
python create_data.py nuscenes_data_prep --data_path=NUSCENES_TEST_DATASET_ROOT --version="v1.0-test" --max_sweeps=10 --dataset_name="NuscenesDataset"
Si es necesario modificar los datos personalizados de acuerdo con la categoría, convierta los datos en un archivo bin, empaquételos en un archivo pkl y luego ejecute el comando anterior.
Tales como: (1) my_common.py empaqueta etiquetas y nubes de puntos en pkl.
(2) python my_create_data.py mydata --data_path=datapath
2. Modificar el archivo de configuración
Abra second.pytorch/second/configs/car.lite.config para editar la categoría y la ruta de datos de entrenamiento.
train_input_reader: {
...
database_sampler {
database_info_path: "/path/to/dataset_dbinfos_train.pkl"
...
}
dataset: {
dataset_class_name: "DATASET_NAME"
kitti_info_path: "/path/to/dataset_infos_train.pkl"
kitti_root_path: "DATASET_ROOT"
}
}
...
eval_input_reader: {
...
dataset: {
dataset_class_name: "DATASET_NAME"
kitti_info_path: "/path/to/dataset_infos_val.pkl"
kitti_root_path: "DATASET_ROOT"
}
}
3. Empieza a entrenar
GPU única:
python ./pytorch/train.py train --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir
Varias GPU:
CUDA_VISIBLE_DEVICES=0,1,3 python ./pytorch/train.py train --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir --multi_gpu=True
Entrenamiento de coma flotante de 16 bits:
Modifique el archivo de configuración y establezca enable_mixed_precision en verdadero.
(1) Si desea entrenar un nuevo modelo, asegúrese de que "/path/to/model_dir" no exista. Si model_dir no existe, se creará un nuevo directorio; de lo contrario, se leerá el punto de control que contiene.
(2) El proceso de entrenamiento usa el tamaño de lote = 6 como el valor predeterminado de 1080 Ti. Si la memoria de la GPU es menor, debe reducir el tamaño del lote.
(3) Actualmente solo se admite un solo entrenamiento de GPU, pero entrenar un modelo lleva solo 20 horas en un solo 1080Ti, y solo 50 ciclos pueden alcanzar 78,3 AP y usar superconverge in car Medium 3D en la fecha de validación de Kitti establecida.
4. Verificar
El resultado de la detección se guarda como: result.pkl de forma predeterminada, puede configurar --pickle_result=False y guardar el resultado como un formato de etiqueta kitt
python ./pytorch/train.py evaluate --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir --measure_time=True --batch_size=1
5, entrenamiento de datos personalizados
Debe modificar o reescribir second.data.kitti_dataset para registrar funciones en el modo @register_dataset
Debe modificar eval.py durante el entrenamiento, principalmente para personalizar la categoría de datos, etc.
from pathlib import Path
import pickle
import time
from functools import partial
import numpy as np
from second.core import box_np_ops
from second.core import preprocess as prep
from second.data import kitti_common as kitti
from second.utils.eval import get_coco_eval_result, get_official_eval_result
from second.data.dataset import Dataset, register_dataset
from second.utils.progress_bar import progress_bar_iter as prog_bar
@register_dataset
class KittiDataset(Dataset):
NumPointFeatures = 4
def __init__(self,
root_path,
info_path,
class_names=None,
prep_func=None,
num_point_features=None):
assert info_path is not None
with open(info_path, 'rb') as f:
infos = pickle.load(f)
self._root_path = Path(root_path)
self._kitti_infos = infos
print("remain number of infos:", len(self._kitti_infos))
self._class_names = class_names
self._prep_func = prep_func
def __len__(self):
return len(self._kitti_infos)
def convert_detection_to_kitti_annos(self, detection):
class_names = self._class_names
det_image_idxes = [det["metadata"]["image_idx"] for det in detection]
gt_image_idxes = [
info["image"]["image_idx"] for info in self._kitti_infos
]
annos = []
for i in range(len(detection)):
det_idx = det_image_idxes[i]
det = detection[i]
# info = self._kitti_infos[gt_image_idxes.index(det_idx)]
info = self._kitti_infos[i]
calib = info["calib"]
rect = calib["R0_rect"]
Trv2c = calib["Tr_velo_to_cam"]
P2 = calib["P2"]
final_box_preds = det["box3d_lidar"].detach().cpu().numpy()
label_preds = det["label_preds"].detach().cpu().numpy()
scores = det["scores"].detach().cpu().numpy()
if final_box_preds.shape[0] != 0:
final_box_preds[:, 2] -= final_box_preds[:, 5] / 2
box3d_camera = box_np_ops.box_lidar_to_camera(
final_box_preds, rect, Trv2c)
locs = box3d_camera[:, :3]
dims = box3d_camera[:, 3:6]
angles = box3d_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_np_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
box_corners_in_image = box_np_ops.project_to_image(
box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = np.min(box_corners_in_image, axis=1)
maxxy = np.max(box_corners_in_image, axis=1)
bbox = np.concatenate([minxy, maxxy], axis=1)
anno = kitti.get_start_result_anno()
num_example = 0
box3d_lidar = final_box_preds
for j in range(box3d_lidar.shape[0]):
image_shape = info["image"]["image_shape"]
if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]:
continue
if bbox[j, 2] < 0 or bbox[j, 3] < 0:
continue
bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1])
bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0])
anno["bbox"].append(bbox[j])
# convert center format to kitti format
# box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2
anno["alpha"].append(
-np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) +
box3d_camera[j, 6])
anno["dimensions"].append(box3d_camera[j, 3:6])
anno["location"].append(box3d_camera[j, :3])
anno["rotation_y"].append(box3d_camera[j, 6])
anno["name"].append(class_names[int(label_preds[j])])
anno["truncated"].append(0.0)
anno["occluded"].append(0)
anno["score"].append(scores[j])
num_example += 1
if num_example != 0:
anno = {n: np.stack(v) for n, v in anno.items()}
annos.append(anno)
else:
annos.append(kitti.empty_result_anno())
num_example = annos[-1]["name"].shape[0]
annos[-1]["metadata"] = det["metadata"]
return annos
def evaluation(self, detections, output_dir):
"""
detection
When you want to eval your own dataset, you MUST set correct
the z axis and box z center.
If you want to eval by my KITTI eval function, you must
provide the correct format annotations.
ground_truth_annotations format:
{
bbox: [N, 4], if you fill fake data, MUST HAVE >25 HEIGHT!!!!!!
alpha: [N], you can use -10 to ignore it.
occluded: [N], you can use zero.
truncated: [N], you can use zero.
name: [N]
location: [N, 3] center of 3d box.
dimensions: [N, 3] dim of 3d box.
rotation_y: [N] angle.
}
all fields must be filled, but some fields can fill
zero.
"""
if "annos" not in self._kitti_infos[0]:
return None
gt_annos = [info["annos"] for info in self._kitti_infos]
dt_annos = self.convert_detection_to_kitti_annos(detections)
# firstly convert standard detection to kitti-format dt annos
z_axis = 1 # KITTI camera format use y as regular "z" axis.
z_center = 1.0 # KITTI camera box's center is [0.5, 1, 0.5]
# for regular raw lidar data, z_axis = 2, z_center = 0.5.
result_official_dict = get_official_eval_result(
gt_annos,
dt_annos,
self._class_names,
z_axis=z_axis,
z_center=z_center)
result_coco = get_coco_eval_result(
gt_annos,
dt_annos,
self._class_names,
z_axis=z_axis,
z_center=z_center)
return {
"results": {
"official": result_official_dict["result"],
"coco": result_coco["result"],
},
"detail": {
"eval.kitti": {
"official": result_official_dict["detail"],
"coco": result_coco["detail"]
}
},
}
def __getitem__(self, idx):
input_dict = self.get_sensor_data(idx)
example = self._prep_func(input_dict=input_dict)
example["metadata"] = {}
if "image_idx" in input_dict["metadata"]:
example["metadata"] = input_dict["metadata"]
if "anchors_mask" in example:
example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
return example
def get_sensor_data(self, query):
read_image = False
idx = query
if isinstance(query, dict):
read_image = "cam" in query
assert "lidar" in query
idx = query["lidar"]["idx"]
info = self._kitti_infos[idx]
res = {
"lidar": {
"type": "lidar",
"points": None,
},
"metadata": {
"image_idx": info["image"]["image_idx"],
"image_shape": info["image"]["image_shape"],
},
"calib": None,
"cam": {}
}
pc_info = info["point_cloud"]
velo_path = Path(pc_info['velodyne_path'])
if not velo_path.is_absolute():
velo_path = Path(self._root_path) / pc_info['velodyne_path']
velo_reduced_path = velo_path.parent.parent / (
velo_path.parent.stem + '_reduced') / velo_path.name
if velo_reduced_path.exists():
velo_path = velo_reduced_path
points = np.fromfile(
str(velo_path), dtype=np.float32,
count=-1).reshape([-1, self.NumPointFeatures])
res["lidar"]["points"] = points
image_info = info["image"]
image_path = image_info['image_path']
if read_image:
image_path = self._root_path / image_path
with open(str(image_path), 'rb') as f:
image_str = f.read()
res["cam"] = {
"type": "camera",
"data": image_str,
"datatype": image_path.suffix[1:],
}
calib = info["calib"]
calib_dict = {
'rect': calib['R0_rect'],
'Trv2c': calib['Tr_velo_to_cam'],
'P2': calib['P2'],
}
res["calib"] = calib_dict
if 'annos' in info:
annos = info['annos']
# we need other objects to avoid collision when sample
annos = kitti.remove_dontcare(annos)
locs = annos["location"]
dims = annos["dimensions"]
rots = annos["rotation_y"]
gt_names = annos["name"]
# rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), rots], axis=1)
gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
calib = info["calib"]
gt_boxes = box_np_ops.box_camera_to_lidar(
gt_boxes, calib["R0_rect"], calib["Tr_velo_to_cam"])
# only center format is allowed. so we need to convert
# kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
[0.5, 0.5, 0.5])
res["lidar"]["annotations"] = {
'boxes': gt_boxes,
'names': gt_names,
}
res["cam"]["annotations"] = {
'boxes': annos["bbox"],
'names': gt_names,
}
return res
def convert_to_kitti_info_version2(info):
"""convert kitti info v1 to v2 if possible.
"""
if "image" not in info or "calib" not in info or "point_cloud" not in info:
info["image"] = {
'image_shape': info["img_shape"],
'image_idx': info['image_idx'],
'image_path': info['img_path'],
}
info["calib"] = {
"R0_rect": info['calib/R0_rect'],
"Tr_velo_to_cam": info['calib/Tr_velo_to_cam'],
"P2": info['calib/P2'],
}
info["point_cloud"] = {
"velodyne_path": info['velodyne_path'],
}
def kitti_anno_to_label_file(annos, folder):
folder = Path(folder)
for anno in annos:
image_idx = anno["metadata"]["image_idx"]
label_lines = []
for j in range(anno["bbox"].shape[0]):
label_dict = {
'name': anno["name"][j],
'alpha': anno["alpha"][j],
'bbox': anno["bbox"][j],
'location': anno["location"][j],
'dimensions': anno["dimensions"][j],
'rotation_y': anno["rotation_y"][j],
'score': anno["score"][j],
}
label_line = kitti.kitti_result_line(label_dict)
label_lines.append(label_line)
label_file = folder / f"{kitti.get_image_index_str(image_idx)}.txt"
label_str = '\n'.join(label_lines)
with open(label_file, 'w') as f:
f.write(label_str)
def _read_imageset_file(path):
with open(path, 'r') as f:
lines = f.readlines()
return [int(line) for line in lines]
def _calculate_num_points_in_gt(data_path,
infos,
relative_path,
remove_outside=True,
num_features=4):
for info in infos:
pc_info = info["point_cloud"]
image_info = info["image"]
calib = info["calib"]
if relative_path:
v_path = str(Path(data_path) / pc_info["velodyne_path"])
else:
v_path = pc_info["velodyne_path"]
points_v = np.fromfile(
v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info["image_shape"])
# points_v = points_v[points_v[:, 0] > 0]
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
def create_kitti_info_file(data_path, save_path=None, relative_path=True):
imageset_folder = Path(__file__).resolve().parent / "ImageSets"
train_img_ids = _read_imageset_file(str(imageset_folder / "train.txt"))
val_img_ids = _read_imageset_file(str(imageset_folder / "val.txt"))
test_img_ids = _read_imageset_file(str(imageset_folder / "test.txt"))
print("Generate info. this may take several minutes.")
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
kitti_infos_train = kitti.get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
image_ids=train_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
filename = save_path / 'kitti_infos_train.pkl'
print(f"Kitti info train file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train, f)
kitti_infos_val = kitti.get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
image_ids=val_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
filename = save_path / 'kitti_infos_val.pkl'
print(f"Kitti info val file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_val, f)
filename = save_path / 'kitti_infos_trainval.pkl'
print(f"Kitti info trainval file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train + kitti_infos_val, f)
kitti_infos_test = kitti.get_kitti_image_info(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
image_ids=test_img_ids,
relative_path=relative_path)
filename = save_path / 'kitti_infos_test.pkl'
print(f"Kitti info test file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_test, f)
def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False):
with open(info_path, 'rb') as f:
kitti_infos = pickle.load(f)
for info in prog_bar(kitti_infos):
pc_info = info["point_cloud"]
image_info = info["image"]
calib = info["calib"]
v_path = pc_info['velodyne_path']
v_path = Path(data_path) / v_path
points_v = np.fromfile(
str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])
rect = calib['R0_rect']
P2 = calib['P2']
Trv2c = calib['Tr_velo_to_cam']
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if back:
points_v[:, 0] = -points_v[:, 0]
points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
image_info["image_shape"])
if save_path is None:
save_filename = v_path.parent.parent / (
v_path.parent.stem + "_reduced") / v_path.name
# save_filename = str(v_path) + '_reduced'
if back:
save_filename += "_back"
else:
save_filename = str(Path(save_path) / v_path.name)
if back:
save_filename += "_back"
with open(save_filename, 'w') as f:
points_v.tofile(f)
def create_reduced_point_cloud(data_path,
train_info_path=None,
val_info_path=None,
test_info_path=None,
save_path=None,
with_back=False):
if train_info_path is None:
train_info_path = Path(data_path) / 'kitti_infos_train.pkl'
if val_info_path is None:
val_info_path = Path(data_path) / 'kitti_infos_val.pkl'
if test_info_path is None:
test_info_path = Path(data_path) / 'kitti_infos_test.pkl'
_create_reduced_point_cloud(data_path, train_info_path, save_path)
_create_reduced_point_cloud(data_path, val_info_path, save_path)
_create_reduced_point_cloud(data_path, test_info_path, save_path)
if with_back:
_create_reduced_point_cloud(
data_path, train_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, val_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, test_info_path, save_path, back=True)
if __name__ == "__main__":
fire.Fire()
修改 second.utils.eval class_to_name = { 0: 'Coche', 1: 'Peatón', 2: 'Ciclista', 3: 'Furgoneta', 4: 'Persona sentada', 5: 'coche', 6: 'tractor' , 7: 'tráiler', }
def get_official_eval_result(gt_annos,
dt_annos,
current_classes,
difficultys=[0, 1, 2],
z_axis=1,
z_center=1.0):
"""
gt_annos and dt_annos must contains following keys:
[bbox, location, dimensions, rotation_y, score]
"""
overlap_mod = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7]])
overlap_easy = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5],
[0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5],
[0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]])
min_overlaps = np.stack([overlap_mod, overlap_easy], axis=0) # [2, 3, 5]
class_to_name = {
0: 'Car',
1: 'Pedestrian',
2: 'Cyclist',
3: 'Van',
4: 'Person_sitting',
5: 'car',
6: 'tractor',
7: 'trailer',
}
name_to_class = {v: n for n, v in class_to_name.items()}
if not isinstance(current_classes, (list, tuple)):
current_classes = [current_classes]
current_classes_int = []
for curcls in current_classes:
if isinstance(curcls, str):
current_classes_int.append(name_to_class[curcls])
else:
current_classes_int.append(curcls)
current_classes = current_classes_int
min_overlaps = min_overlaps[:, :, current_classes]
result = ''
# check whether alpha is valid
compute_aos = False
for anno in dt_annos:
if anno['alpha'].shape[0] != 0:
if anno['alpha'][0] != -10:
compute_aos = True
break
metrics = do_eval_v3(
gt_annos,
dt_annos,
current_classes,
min_overlaps,
compute_aos,
difficultys,
z_axis=z_axis,
z_center=z_center)
detail = {}
for j, curcls in enumerate(current_classes):
# mAP threshold array: [num_minoverlap, metric, class]
# mAP result: [num_class, num_diff, num_minoverlap]
class_name = class_to_name[curcls]
detail[class_name] = {}
for i in range(min_overlaps.shape[0]):
mAPbbox = get_mAP(metrics["bbox"]["precision"][j, :, i])
mAPbev = get_mAP(metrics["bev"]["precision"][j, :, i])
mAP3d = get_mAP(metrics["3d"]["precision"][j, :, i])
detail[class_name][f"bbox@{min_overlaps[i, 0, j]:.2f}"] = mAPbbox.tolist()
detail[class_name][f"bev@{min_overlaps[i, 1, j]:.2f}"] = mAPbev.tolist()
detail[class_name][f"3d@{min_overlaps[i, 2, j]:.2f}"] = mAP3d.tolist()
result += print_str(
(f"{class_to_name[curcls]} "
"AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j])))
mAPbbox = ", ".join(f"{v:.2f}" for v in mAPbbox)
mAPbev = ", ".join(f"{v:.2f}" for v in mAPbev)
mAP3d = ", ".join(f"{v:.2f}" for v in mAP3d)
result += print_str(f"bbox AP:{mAPbbox}")
result += print_str(f"bev AP:{mAPbev}")
result += print_str(f"3d AP:{mAP3d}")
if compute_aos:
mAPaos = get_mAP(metrics["bbox"]["orientation"][j, :, i])
detail[class_name][f"aos"] = mAPaos.tolist()
mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos)
result += print_str(f"aos AP:{mAPaos}")
return {
"result": result,
"detail": detail,
}