1. Data organization:
Training validation data generation:
python create_data.py nuscenes_data_prep --data_path=NUSCENES_TRAINVAL_DATASET_ROOT --version="v1.0-trainval" --max_sweeps=10
python create_data.py nuscenes_data_prep --data_path=NUSCENES_TEST_DATASET_ROOT --version="v1.0-test" --max_sweeps=10 --dataset_name="NuscenesDataset"
If the custom data needs to be modified according to the category, convert the data into a bin file, pack it into a pkl file, and then execute the above command.
Such as: (1) my_common.py packs labels and point clouds into pkl.
(2)python my_create_data.py mydata --data_path=datapath
2. Modify the config file
Open second.pytorch/second/configs/car.lite.config to edit the category and training data path
train_input_reader: {
...
database_sampler {
database_info_path: "/path/to/dataset_dbinfos_train.pkl"
...
}
dataset: {
dataset_class_name: "DATASET_NAME"
kitti_info_path: "/path/to/dataset_infos_train.pkl"
kitti_root_path: "DATASET_ROOT"
}
}
...
eval_input_reader: {
...
dataset: {
dataset_class_name: "DATASET_NAME"
kitti_info_path: "/path/to/dataset_infos_val.pkl"
kitti_root_path: "DATASET_ROOT"
}
}
3. Start training
Single GPU:
python ./pytorch/train.py train --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir
Multiple GPUs:
CUDA_VISIBLE_DEVICES=0,1,3 python ./pytorch/train.py train --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir --multi_gpu=True
Floating point 16-bit training:
Modify the configuration file and set enable_mixed_precision to true.
(1) If you want to train a new model, please make sure that "/path/to/model_dir" does not exist. If the model_dir does not exist, a new directory will be created, otherwise the checkpoint in it will be read.
(2) The training process uses batchsize=6 as the default value of 1080Ti. If the GPU memory is less, you need to reduce the batchsize.
(3) Only a single GPU training is currently supported, but training a model takes only 20 hours in a single 1080Ti, and only 50 cycles can reach 78.3 AP, and use super converge in car Medium 3D in the Kitti validation date set.
4. Verify
The detection result is saved as: result.pkl by default, you can set --pickle_result=False, and save the result as a kitt label format
python ./pytorch/train.py evaluate --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir --measure_time=True --batch_size=1
5, training custom data
You need to modify or rewrite second.data.kitti_dataset to register functions in @register_dataset mode
You need to modify eval.py during training, mainly to customize the data category, etc.
from pathlib import Path
import pickle
import time
from functools import partial
import numpy as np
from second.core import box_np_ops
from second.core import preprocess as prep
from second.data import kitti_common as kitti
from second.utils.eval import get_coco_eval_result, get_official_eval_result
from second.data.dataset import Dataset, register_dataset
from second.utils.progress_bar import progress_bar_iter as prog_bar
@register_dataset
class KittiDataset(Dataset):
NumPointFeatures = 4
def __init__(self,
root_path,
info_path,
class_names=None,
prep_func=None,
num_point_features=None):
assert info_path is not None
with open(info_path, 'rb') as f:
infos = pickle.load(f)
self._root_path = Path(root_path)
self._kitti_infos = infos
print("remain number of infos:", len(self._kitti_infos))
self._class_names = class_names
self._prep_func = prep_func
def __len__(self):
return len(self._kitti_infos)
def convert_detection_to_kitti_annos(self, detection):
class_names = self._class_names
det_image_idxes = [det["metadata"]["image_idx"] for det in detection]
gt_image_idxes = [
info["image"]["image_idx"] for info in self._kitti_infos
]
annos = []
for i in range(len(detection)):
det_idx = det_image_idxes[i]
det = detection[i]
# info = self._kitti_infos[gt_image_idxes.index(det_idx)]
info = self._kitti_infos[i]
calib = info["calib"]
rect = calib["R0_rect"]
Trv2c = calib["Tr_velo_to_cam"]
P2 = calib["P2"]
final_box_preds = det["box3d_lidar"].detach().cpu().numpy()
label_preds = det["label_preds"].detach().cpu().numpy()
scores = det["scores"].detach().cpu().numpy()
if final_box_preds.shape[0] != 0:
final_box_preds[:, 2] -= final_box_preds[:, 5] / 2
box3d_camera = box_np_ops.box_lidar_to_camera(
final_box_preds, rect, Trv2c)
locs = box3d_camera[:, :3]
dims = box3d_camera[:, 3:6]
angles = box3d_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_np_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
box_corners_in_image = box_np_ops.project_to_image(
box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = np.min(box_corners_in_image, axis=1)
maxxy = np.max(box_corners_in_image, axis=1)
bbox = np.concatenate([minxy, maxxy], axis=1)
anno = kitti.get_start_result_anno()
num_example = 0
box3d_lidar = final_box_preds
for j in range(box3d_lidar.shape[0]):
image_shape = info["image"]["image_shape"]
if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]:
continue
if bbox[j, 2] < 0 or bbox[j, 3] < 0:
continue
bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1])
bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0])
anno["bbox"].append(bbox[j])
# convert center format to kitti format
# box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2
anno["alpha"].append(
-np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) +
box3d_camera[j, 6])
anno["dimensions"].append(box3d_camera[j, 3:6])
anno["location"].append(box3d_camera[j, :3])
anno["rotation_y"].append(box3d_camera[j, 6])
anno["name"].append(class_names[int(label_preds[j])])
anno["truncated"].append(0.0)
anno["occluded"].append(0)
anno["score"].append(scores[j])
num_example += 1
if num_example != 0:
anno = {n: np.stack(v) for n, v in anno.items()}
annos.append(anno)
else:
annos.append(kitti.empty_result_anno())
num_example = annos[-1]["name"].shape[0]
annos[-1]["metadata"] = det["metadata"]
return annos
def evaluation(self, detections, output_dir):
"""
detection
When you want to eval your own dataset, you MUST set correct
the z axis and box z center.
If you want to eval by my KITTI eval function, you must
provide the correct format annotations.
ground_truth_annotations format:
{
bbox: [N, 4], if you fill fake data, MUST HAVE >25 HEIGHT!!!!!!
alpha: [N], you can use -10 to ignore it.
occluded: [N], you can use zero.
truncated: [N], you can use zero.
name: [N]
location: [N, 3] center of 3d box.
dimensions: [N, 3] dim of 3d box.
rotation_y: [N] angle.
}
all fields must be filled, but some fields can fill
zero.
"""
if "annos" not in self._kitti_infos[0]:
return None
gt_annos = [info["annos"] for info in self._kitti_infos]
dt_annos = self.convert_detection_to_kitti_annos(detections)
# firstly convert standard detection to kitti-format dt annos
z_axis = 1 # KITTI camera format use y as regular "z" axis.
z_center = 1.0 # KITTI camera box's center is [0.5, 1, 0.5]
# for regular raw lidar data, z_axis = 2, z_center = 0.5.
result_official_dict = get_official_eval_result(
gt_annos,
dt_annos,
self._class_names,
z_axis=z_axis,
z_center=z_center)
result_coco = get_coco_eval_result(
gt_annos,
dt_annos,
self._class_names,
z_axis=z_axis,
z_center=z_center)
return {
"results": {
"official": result_official_dict["result"],
"coco": result_coco["result"],
},
"detail": {
"eval.kitti": {
"official": result_official_dict["detail"],
"coco": result_coco["detail"]
}
},
}
def __getitem__(self, idx):
input_dict = self.get_sensor_data(idx)
example = self._prep_func(input_dict=input_dict)
example["metadata"] = {}
if "image_idx" in input_dict["metadata"]:
example["metadata"] = input_dict["metadata"]
if "anchors_mask" in example:
example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
return example
def get_sensor_data(self, query):
read_image = False
idx = query
if isinstance(query, dict):
read_image = "cam" in query
assert "lidar" in query
idx = query["lidar"]["idx"]
info = self._kitti_infos[idx]
res = {
"lidar": {
"type": "lidar",
"points": None,
},
"metadata": {
"image_idx": info["image"]["image_idx"],
"image_shape": info["image"]["image_shape"],
},
"calib": None,
"cam": {}
}
pc_info = info["point_cloud"]
velo_path = Path(pc_info['velodyne_path'])
if not velo_path.is_absolute():
velo_path = Path(self._root_path) / pc_info['velodyne_path']
velo_reduced_path = velo_path.parent.parent / (
velo_path.parent.stem + '_reduced') / velo_path.name
if velo_reduced_path.exists():
velo_path = velo_reduced_path
points = np.fromfile(
str(velo_path), dtype=np.float32,
count=-1).reshape([-1, self.NumPointFeatures])
res["lidar"]["points"] = points
image_info = info["image"]
image_path = image_info['image_path']
if read_image:
image_path = self._root_path / image_path
with open(str(image_path), 'rb') as f:
image_str = f.read()
res["cam"] = {
"type": "camera",
"data": image_str,
"datatype": image_path.suffix[1:],
}
calib = info["calib"]
calib_dict = {
'rect': calib['R0_rect'],
'Trv2c': calib['Tr_velo_to_cam'],
'P2': calib['P2'],
}
res["calib"] = calib_dict
if 'annos' in info:
annos = info['annos']
# we need other objects to avoid collision when sample
annos = kitti.remove_dontcare(annos)
locs = annos["location"]
dims = annos["dimensions"]
rots = annos["rotation_y"]
gt_names = annos["name"]
# rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), rots], axis=1)
gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
calib = info["calib"]
gt_boxes = box_np_ops.box_camera_to_lidar(
gt_boxes, calib["R0_rect"], calib["Tr_velo_to_cam"])
# only center format is allowed. so we need to convert
# kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
[0.5, 0.5, 0.5])
res["lidar"]["annotations"] = {
'boxes': gt_boxes,
'names': gt_names,
}
res["cam"]["annotations"] = {
'boxes': annos["bbox"],
'names': gt_names,
}
return res
def convert_to_kitti_info_version2(info):
"""convert kitti info v1 to v2 if possible.
"""
if "image" not in info or "calib" not in info or "point_cloud" not in info:
info["image"] = {
'image_shape': info["img_shape"],
'image_idx': info['image_idx'],
'image_path': info['img_path'],
}
info["calib"] = {
"R0_rect": info['calib/R0_rect'],
"Tr_velo_to_cam": info['calib/Tr_velo_to_cam'],
"P2": info['calib/P2'],
}
info["point_cloud"] = {
"velodyne_path": info['velodyne_path'],
}
def kitti_anno_to_label_file(annos, folder):
folder = Path(folder)
for anno in annos:
image_idx = anno["metadata"]["image_idx"]
label_lines = []
for j in range(anno["bbox"].shape[0]):
label_dict = {
'name': anno["name"][j],
'alpha': anno["alpha"][j],
'bbox': anno["bbox"][j],
'location': anno["location"][j],
'dimensions': anno["dimensions"][j],
'rotation_y': anno["rotation_y"][j],
'score': anno["score"][j],
}
label_line = kitti.kitti_result_line(label_dict)
label_lines.append(label_line)
label_file = folder / f"{kitti.get_image_index_str(image_idx)}.txt"
label_str = '\n'.join(label_lines)
with open(label_file, 'w') as f:
f.write(label_str)
def _read_imageset_file(path):
with open(path, 'r') as f:
lines = f.readlines()
return [int(line) for line in lines]
def _calculate_num_points_in_gt(data_path,
infos,
relative_path,
remove_outside=True,
num_features=4):
for info in infos:
pc_info = info["point_cloud"]
image_info = info["image"]
calib = info["calib"]
if relative_path:
v_path = str(Path(data_path) / pc_info["velodyne_path"])
else:
v_path = pc_info["velodyne_path"]
points_v = np.fromfile(
v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info["image_shape"])
# points_v = points_v[points_v[:, 0] > 0]
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
def create_kitti_info_file(data_path, save_path=None, relative_path=True):
imageset_folder = Path(__file__).resolve().parent / "ImageSets"
train_img_ids = _read_imageset_file(str(imageset_folder / "train.txt"))
val_img_ids = _read_imageset_file(str(imageset_folder / "val.txt"))
test_img_ids = _read_imageset_file(str(imageset_folder / "test.txt"))
print("Generate info. this may take several minutes.")
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
kitti_infos_train = kitti.get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
image_ids=train_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
filename = save_path / 'kitti_infos_train.pkl'
print(f"Kitti info train file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train, f)
kitti_infos_val = kitti.get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
image_ids=val_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
filename = save_path / 'kitti_infos_val.pkl'
print(f"Kitti info val file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_val, f)
filename = save_path / 'kitti_infos_trainval.pkl'
print(f"Kitti info trainval file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train + kitti_infos_val, f)
kitti_infos_test = kitti.get_kitti_image_info(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
image_ids=test_img_ids,
relative_path=relative_path)
filename = save_path / 'kitti_infos_test.pkl'
print(f"Kitti info test file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_test, f)
def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False):
with open(info_path, 'rb') as f:
kitti_infos = pickle.load(f)
for info in prog_bar(kitti_infos):
pc_info = info["point_cloud"]
image_info = info["image"]
calib = info["calib"]
v_path = pc_info['velodyne_path']
v_path = Path(data_path) / v_path
points_v = np.fromfile(
str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])
rect = calib['R0_rect']
P2 = calib['P2']
Trv2c = calib['Tr_velo_to_cam']
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if back:
points_v[:, 0] = -points_v[:, 0]
points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
image_info["image_shape"])
if save_path is None:
save_filename = v_path.parent.parent / (
v_path.parent.stem + "_reduced") / v_path.name
# save_filename = str(v_path) + '_reduced'
if back:
save_filename += "_back"
else:
save_filename = str(Path(save_path) / v_path.name)
if back:
save_filename += "_back"
with open(save_filename, 'w') as f:
points_v.tofile(f)
def create_reduced_point_cloud(data_path,
train_info_path=None,
val_info_path=None,
test_info_path=None,
save_path=None,
with_back=False):
if train_info_path is None:
train_info_path = Path(data_path) / 'kitti_infos_train.pkl'
if val_info_path is None:
val_info_path = Path(data_path) / 'kitti_infos_val.pkl'
if test_info_path is None:
test_info_path = Path(data_path) / 'kitti_infos_test.pkl'
_create_reduced_point_cloud(data_path, train_info_path, save_path)
_create_reduced_point_cloud(data_path, val_info_path, save_path)
_create_reduced_point_cloud(data_path, test_info_path, save_path)
if with_back:
_create_reduced_point_cloud(
data_path, train_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, val_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, test_info_path, save_path, back=True)
if __name__ == "__main__":
fire.Fire()
修改second.utils.eval class_to_name = {
0: 'Car',
1: 'Pedestrian',
2: 'Cyclist',
3: 'Van',
4: 'Person_sitting',
5: 'car',
6: 'tractor',
7: 'trailer',
}
def get_official_eval_result(gt_annos,
dt_annos,
current_classes,
difficultys=[0, 1, 2],
z_axis=1,
z_center=1.0):
"""
gt_annos and dt_annos must contains following keys:
[bbox, location, dimensions, rotation_y, score]
"""
overlap_mod = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7]])
overlap_easy = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5],
[0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5],
[0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]])
min_overlaps = np.stack([overlap_mod, overlap_easy], axis=0) # [2, 3, 5]
class_to_name = {
0: 'Car',
1: 'Pedestrian',
2: 'Cyclist',
3: 'Van',
4: 'Person_sitting',
5: 'car',
6: 'tractor',
7: 'trailer',
}
name_to_class = {v: n for n, v in class_to_name.items()}
if not isinstance(current_classes, (list, tuple)):
current_classes = [current_classes]
current_classes_int = []
for curcls in current_classes:
if isinstance(curcls, str):
current_classes_int.append(name_to_class[curcls])
else:
current_classes_int.append(curcls)
current_classes = current_classes_int
min_overlaps = min_overlaps[:, :, current_classes]
result = ''
# check whether alpha is valid
compute_aos = False
for anno in dt_annos:
if anno['alpha'].shape[0] != 0:
if anno['alpha'][0] != -10:
compute_aos = True
break
metrics = do_eval_v3(
gt_annos,
dt_annos,
current_classes,
min_overlaps,
compute_aos,
difficultys,
z_axis=z_axis,
z_center=z_center)
detail = {}
for j, curcls in enumerate(current_classes):
# mAP threshold array: [num_minoverlap, metric, class]
# mAP result: [num_class, num_diff, num_minoverlap]
class_name = class_to_name[curcls]
detail[class_name] = {}
for i in range(min_overlaps.shape[0]):
mAPbbox = get_mAP(metrics["bbox"]["precision"][j, :, i])
mAPbev = get_mAP(metrics["bev"]["precision"][j, :, i])
mAP3d = get_mAP(metrics["3d"]["precision"][j, :, i])
detail[class_name][f"bbox@{min_overlaps[i, 0, j]:.2f}"] = mAPbbox.tolist()
detail[class_name][f"bev@{min_overlaps[i, 1, j]:.2f}"] = mAPbev.tolist()
detail[class_name][f"3d@{min_overlaps[i, 2, j]:.2f}"] = mAP3d.tolist()
result += print_str(
(f"{class_to_name[curcls]} "
"AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j])))
mAPbbox = ", ".join(f"{v:.2f}" for v in mAPbbox)
mAPbev = ", ".join(f"{v:.2f}" for v in mAPbev)
mAP3d = ", ".join(f"{v:.2f}" for v in mAP3d)
result += print_str(f"bbox AP:{mAPbbox}")
result += print_str(f"bev AP:{mAPbev}")
result += print_str(f"3d AP:{mAP3d}")
if compute_aos:
mAPaos = get_mAP(metrics["bbox"]["orientation"][j, :, i])
detail[class_name][f"aos"] = mAPaos.tolist()
mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos)
result += print_str(f"aos AP:{mAPaos}")
return {
"result": result,
"detail": detail,
}