classificação
Pegue 10 categorias e a versão da GPU como exemplo.
Primeiro, converta o arquivo de peso pth treinado por pytorch em um arquivo onnx:
import torch
import pointnet_cls
point_num = 1024
class_num = 10
normal_channel = False
model = pointnet_cls.get_model(class_num, normal_channel)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('./cls.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num))
x = x.cuda() #cpu版本需注释此句
export_onnx_file = "./cls.onnx"
torch.onnx.export(model,
x,
export_onnx_file,
opset_version = 11)
raciocínio python:
import numpy as np
import onnxruntime
point_num = 1024
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
pc = pc / m
return pc
if __name__ == '__main__':
file = './bed_0610.txt'
data = np.loadtxt(file, delimiter=',').astype(np.float32)
point_set = data[:, 0:3]
point_set = point_set[0:point_num, :]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
points = np.reshape(point_set, ((1, point_num, 3)))
points = points.swapaxes(2, 1)
onnx_session = onnxruntime.InferenceSession("cls.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name=[]
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name=[]
for node in onnx_session.get_outputs():
output_name.append(node.name)
input_feed={
}
for name in input_name:
input_feed[name] = points
pred = onnx_session.run(None, input_feed)[0]
print(np.argmax(pred))
Raciocínio C++:
#include <iostream>
#include <vector>
#include <fstream>
#include <onnxruntime_cxx_api.h>
const int point_num = 1024;
const int class_num = 10;
void pc_normalize(std::vector<float>& points)
{
float mean_x = 0, mean_y = 0, mean_z = 0;
for (size_t i = 0; i < point_num; ++i)
{
mean_x += points[3 * i];
mean_y += points[3 * i + 1];
mean_z += points[3 * i + 2];
}
mean_x /= point_num;
mean_y /= point_num;
mean_z /= point_num;
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] -= mean_x;
points[3 * i + 1] -= mean_y;
points[3 * i + 2] -= mean_z;
}
float m = 0;
for (size_t i = 0; i < point_num; ++i)
{
if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m)
m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2));
}
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] /= m;
points[3 * i + 1] /= m;
points[3 * i + 2] /= m;
}
}
void classfier(std::vector<float> & points)
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "cls");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"cls.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names = {
"input.1" };
std::vector<const char*> output_node_names = {
"212" };
const size_t input_tensor_size = 1 * 3 * point_num ;
std::vector<float> input_tensor_values(input_tensor_size);
for (size_t i = 0; i < 3; i++)
{
for (size_t j = 0; j < point_num; j++)
{
input_tensor_values[point_num * i + j] = points[3 * j + i];
}
}
std::vector<int64_t> input_node_dims = {
1, 3, point_num };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor));
std::vector<Ort::Value> output_tensors = session.Run(Ort::RunOptions{
nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
const float* rawOutput = output_tensors[0].GetTensorData<float>();
std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> output(rawOutput, rawOutput + count);
int predict_label = std::max_element(output.begin(), output.end()) - output.begin();
std::cout << predict_label << std::endl;
}
int main()
{
std::vector<float> points;
float x, y, z, nx, ny, nz;
char ch;
std::ifstream infile("bed_0610.txt");
for (size_t i = 0; i < point_num; i++)
{
infile >> x >> ch >> y >> ch >> z >> ch >> nx >> ch >> ny >> ch >> nz;
points.push_back(x);
points.push_back(y);
points.push_back(z);
}
infile.close();
pc_normalize(points);
classfier(points);
return 0;
}
segmentação de peças
Pegue 16 categorias e 50 peças e a versão gpu como exemplo.
Primeiro, converta o arquivo de peso pth treinado por pytorch em um arquivo onnx:
import torch
import torch
import pointnet_part_seg
point_num = 2048
class_num = 16
part_num = 50
normal_channel = False
def to_categorical(y, class_num):
""" 1-hot encodes a tensor """
new_y = torch.eye(class_num)[y.cpu().data.numpy(),]
if (y.is_cuda):
return new_y.cuda()
return new_y
model = pointnet_part_seg.get_model(part_num, normal_channel)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('./part_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num))
x = x.cuda() #cpu版本需注释此句
label = torch.randint(0, 1, (1, 1))
label = label.cuda() #cpu版本需注释此句
export_onnx_file = "./part_seg.onnx"
torch.onnx.export(model,
(x, to_categorical(label, class_num)),
export_onnx_file,
opset_version = 11)
raciocínio python:
import numpy as np
import onnxruntime
point_num = 2048
class_num = 16
def to_categorical(y, class_num):
""" 1-hot encodes a tensor """
new_y = np.eye(class_num)[y,]
return new_y.astype(np.float32)
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))
pc = pc / m
return pc
if __name__ == '__main__':
data = np.loadtxt('85a15c26a6e9921ae008cc4902bfe3cd.txt').astype(np.float32)
point_set = data[:, 0:3]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
choice = np.random.choice(point_set.shape[0], point_num, replace=True)
point_set = point_set[choice, :][:, 0:3]
pts = point_set
points = np.reshape(point_set, ((1, point_num, 3)))
points = points.swapaxes(2, 1)
label = np.array([[0]], dtype=np.int32)
onnx_session = onnxruntime.InferenceSession("part_seg.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name=[]
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name=[]
for node in onnx_session.get_outputs():
output_name.append(node.name)
input_feed={
}
input_feed[input_name[0]] = points
input_feed[input_name[1]] = to_categorical(label, class_num)
pred = onnx_session.run(None, input_feed)[0]
cur_pred_val_logits = pred
cur_pred_val = np.zeros((1, point_num)).astype(np.int32)
logits = cur_pred_val_logits[0, :, :]
cur_pred_val[0, :] = np.argmax(logits, 1)
pts = np.append(pts.reshape(point_num, 3), cur_pred_val[0, :].reshape(point_num, 1), 1)
np.savetxt('pred.txt', pts, fmt='%.06f')
Raciocínio C++:
#include <iostream>
#include <vector>
#include <fstream>
#include <ctime>
#include <onnxruntime_cxx_api.h>
const int point_num = 2048;
const int class_num = 16;
const int parts_num = 50;
void pc_normalize(std::vector<float>& points)
{
float mean_x = 0, mean_y = 0, mean_z = 0;
for (size_t i = 0; i < point_num; ++i)
{
mean_x += points[3 * i];
mean_y += points[3 * i + 1];
mean_z += points[3 * i + 2];
}
mean_x /= point_num;
mean_y /= point_num;
mean_z /= point_num;
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] -= mean_x;
points[3 * i + 1] -= mean_y;
points[3 * i + 2] -= mean_z;
}
float m = 0;
for (size_t i = 0; i < point_num; ++i)
{
if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m)
m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2));
}
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] /= m;
points[3 * i + 1] /= m;
points[3 * i + 2] /= m;
}
}
void resample(std::vector<float>& points)
{
srand((int)time(0));
std::vector<int> choice(point_num);
for (size_t i = 0; i < point_num; i++)
{
choice[i] = rand() % (points.size() / 3);
}
std::vector<float> temp_points(3 * point_num);
for (size_t i = 0; i < point_num; i++)
{
temp_points[3 * i] = points[3 * choice[i]];
temp_points[3 * i + 1] = points[3 * choice[i] + 1];
temp_points[3 * i + 2] = points[3 * choice[i] + 2];
}
points = temp_points;
}
std::vector<int> classfier(std::vector<float> & points, std::vector<float> & labels)
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "part_seg");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"part_seg.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names = {
"input.1" , "1"};
std::vector<const char*> output_node_names = {
"277" };
const size_t input_tensor_size0 = 1 * 3 * point_num;
std::vector<float> input_tensor_values0(input_tensor_size0);
for (size_t i = 0; i < 3; i++)
{
for (size_t j = 0; j < point_num; j++)
{
input_tensor_values0[point_num * i + j] = points[3 * j + i];
}
}
std::vector<int64_t> input_node_dims0 = {
1, 3, point_num };
auto memory_info0 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor0 = Ort::Value::CreateTensor<float>(memory_info0, input_tensor_values0.data(), input_tensor_size0, input_node_dims0.data(), input_node_dims0.size());
const size_t input_tensor_size1 = 1 * 1 * class_num;
std::vector<float> input_tensor_values1(input_tensor_size0);
for (size_t i = 0; i < class_num; i++)
{
input_tensor_values1[i] = labels[i];
}
std::vector<int64_t> input_node_dims1 = {
1, 1, class_num };
auto memory_info1 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor1 = Ort::Value::CreateTensor<float>(memory_info1, input_tensor_values1.data(), input_tensor_size1, input_node_dims1.data(), input_node_dims1.size());
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor0));
ort_inputs.push_back(std::move(input_tensor1));
std::vector<Ort::Value> output_tensors = session.Run(Ort::RunOptions{
nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
const float* rawOutput = output_tensors[0].GetTensorData<float>();
std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> prob(rawOutput, rawOutput + count);
std::vector<std::vector<float>> outputs(point_num, std::vector<float>(parts_num, 0));
for (size_t i = 0; i < point_num; i++)
{
for (size_t j = 0; j < parts_num; j++)
{
outputs[i][j] = prob[i * parts_num + j];
//std::cout <<outputs[i][j] << " ";
}
//std::cout << std::endl;
}
std::vector<int> max_index(point_num, 0);
for (size_t i = 0; i < point_num; i++)
{
max_index[i]= std::max_element(outputs[i].begin(), outputs[i].end()) - outputs[i].begin();
//std::cout << max_index[i] << " ";
}
return max_index;
}
int main()
{
std::vector<float> points, labels;
float x, y, z, nx, ny, nz, label;
std::ifstream infile("85a15c26a6e9921ae008cc4902bfe3cd.txt");
while (infile >> x >> y >> z >> nx >> ny >> nz >> label)
{
points.push_back(x);
points.push_back(y);
points.push_back(z);
}
for (size_t i = 0; i < class_num; i++)
{
labels.push_back(0.0);
}
labels[0] = 1.0;
infile.close();
pc_normalize(points);
resample(points);
std::vector<int> result = classfier(points, labels);
std::fstream outfile("pred.txt", 'w');
for (size_t i = 0; i < point_num; i++)
{
outfile << points[3 * i] << " " << points[3 * i + 1] << " " << points[3 * i + 2] << " " << result[i]<< std::endl;
}
outfile.close();
return 0;
}
segmentação semática
Pegue 13 categorias e a versão da GPU como exemplo.
Primeiro, converta o arquivo de peso pth treinado por pytorch em um arquivo onnx:
import torch
import pointnet_sem_seg
point_num = 4096
class_num = 13
model = pointnet_sem_seg.get_model(class_num)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('sem_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = torch.rand(1, 9, point_num)
x = x.cuda() #cpu版本需注释此句
export_onnx_file = "./sem_seg.onnx"
torch.onnx.export(model,
x,
export_onnx_file,
opset_version = 11)
Código de raciocínio Python:
import numpy as np
import onnxruntime
point_num = 4096
class_num = 13
stride = 0.5
block_size = 1.0
if __name__ == '__main__':
data = np.load('Area_1_conferenceRoom_1.npy')
points = data[:,:6]
coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3]
grid_x = int(np.ceil(float(coord_max[0] - coord_min[0] - block_size) / stride) + 1)
grid_y = int(np.ceil(float(coord_max[1] - coord_min[1] - block_size) / stride) + 1)
data_room, index_room = np.array([]), np.array([])
for index_y in range(0, grid_y):
for index_x in range(0, grid_x):
s_x = coord_min[0] + index_x * stride
e_x = min(s_x + block_size, coord_max[0])
s_x = e_x - block_size
s_y = coord_min[1] + index_y * stride
e_y = min(s_y + block_size, coord_max[1])
s_y = e_y - block_size
point_idxs = np.where((points[:, 0] >= s_x) & (points[:, 0] <= e_x) & (points[:, 1] >= s_y) & (points[:, 1] <= e_y))[0]
if point_idxs.size == 0:
continue
num_batch = int(np.ceil(point_idxs.size / point_num))
point_size = int(num_batch * point_num)
replace = False if (point_size - point_idxs.size <= point_idxs.size) else True
point_idxs_repeat = np.random.choice(point_idxs, point_size - point_idxs.size, replace=replace)
point_idxs = np.concatenate((point_idxs, point_idxs_repeat))
np.random.shuffle(point_idxs)
data_batch = points[point_idxs, :]
normlized_xyz = np.zeros((point_size, 3))
normlized_xyz[:, 0] = data_batch[:, 0] / coord_max[0]
normlized_xyz[:, 1] = data_batch[:, 1] / coord_max[1]
normlized_xyz[:, 2] = data_batch[:, 2] / coord_max[2]
data_batch[:, 0] = data_batch[:, 0] - (s_x + block_size / 2.0)
data_batch[:, 1] = data_batch[:, 1] - (s_y + block_size / 2.0)
data_batch[:, 3:6] /= 255.0
data_batch = np.concatenate((data_batch, normlized_xyz), axis=1)
data_room = np.vstack([data_room, data_batch]) if data_room.size else data_batch
index_room = np.hstack([index_room, point_idxs]) if index_room.size else point_idxs
data_room = data_room.reshape((-1, point_num, data_room.shape[1]))
index_room = index_room.reshape((-1, point_num))
onnx_session = onnxruntime.InferenceSession("sem_seg.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name=[]
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name=[]
for node in onnx_session.get_outputs():
output_name.append(node.name)
vote_label_pool = np.zeros((points.shape[0], class_num))
num_blocks = data_room.shape[0]
batch_data = np.zeros((1, point_num, 9))
batch_point_index = np.zeros((1, point_num))
for sbatch in range(num_blocks):
print(sbatch, range(num_blocks))
start_idx = sbatch
end_idx = min(sbatch + 1, num_blocks)
real_batch_size = end_idx - start_idx
batch_data[0:real_batch_size, ...] = data_room[start_idx:end_idx, ...]
batch_point_index[0:real_batch_size, ...] = index_room[start_idx:end_idx, ...]
input_feed={
}
for name in input_name:
input_feed[name] = batch_data.swapaxes(2, 1).astype(np.float32)
seg_pred = onnx_session.run(None, input_feed)[0]
batch_pred_label = np.argmax(seg_pred, 2)
point_idx = batch_point_index[0:real_batch_size, ...]
pred_label = batch_pred_label[0:real_batch_size, ...]
for b in range(pred_label.shape[0]):
for n in range(pred_label.shape[1]):
vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1
pred = np.argmax(vote_label_pool, 1)
fout = open('pred.txt', 'w')
for i in range(points.shape[0]):
fout.write('%f %f %f %d\n' % (points[i, 0], points[i, 1], points[i, 2], pred[i]))
fout.close()
Raciocínio C++:
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <ctime>
#include <random>
#include <onnxruntime_cxx_api.h>
const int point_num = 4096;
const int class_num = 13;
struct point
{
float m_x, m_y, m_z, m_r, m_g, m_b, m_normal_x, m_normal_y, m_normal_z;
point() :
m_x(0), m_y(0), m_z(0), m_r(0), m_g(0), m_b(0), m_normal_x(0), m_normal_y(0), m_normal_z(0) {
}
point(float x, float y, float z, float r, float g, float b) :
m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(0), m_normal_y(0), m_normal_z(0) {
}
point(float x, float y, float z, float r, float g, float b, float normal_x, float normal_y, float normal_z) :
m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(normal_x), m_normal_y(normal_y), m_normal_z(normal_z) {
}
};
int main()
{
float x, y, z, r, g, b, l;
std::vector<point> pts;
std::vector<float> points_x, points_y, points_z;
int points_num = 0;
std::ifstream infile("Area_1_conferenceRoom_1.txt");
while (infile >> x >> y >> z >> r >> g >> b >> l)
{
point pt(x, y, z, r, g, b);
pts.push_back(pt);
points_x.push_back(x);
points_y.push_back(y);
points_z.push_back(z);
points_num++;
}
float x_min = *std::min_element(points_x.begin(), points_x.end());
float y_min = *std::min_element(points_y.begin(), points_y.end());
float z_min = *std::min_element(points_z.begin(), points_z.end());
float x_max = *std::max_element(points_x.begin(), points_x.end());
float y_max = *std::max_element(points_y.begin(), points_y.end());
float z_max = *std::max_element(points_z.begin(), points_z.end());
float stride = 0.5;
float block_size = 1.0;
srand((int)time(0));
int grid_x = ceil((x_max - x_min - block_size) / stride) + 1;
int grid_y = ceil((y_max - y_min - block_size) / stride) + 1;
std::vector<point> data_room;
std::vector<int> index_room;
for (size_t index_y = 0; index_y < grid_y; index_y++)
{
for (size_t index_x = 0; index_x < grid_x; index_x++)
{
float s_x = x_min + index_x * stride;
float e_x = std::min(s_x + block_size, x_max);
s_x = e_x - block_size;
float s_y = y_min + index_y * stride;
float e_y = std::min(s_y + block_size, y_max);
s_y = e_y - block_size;
std::vector<int> point_idxs;
for (size_t i = 0; i < points_num; i++)
{
if (points_x[i] >= s_x && points_x[i] <= e_x && points_y[i] >= s_y && points_y[i] <= e_y)
point_idxs.push_back(i);
}
if (point_idxs.size() == 0)
continue;
int num_batch = ceil(point_idxs.size() * 1.0 / point_num);
int point_size = num_batch * point_num;
bool replace = (point_size - point_idxs.size() <= point_idxs.size() ? false : true);
std::vector<int> point_idxs_repeat;
if (replace)
{
for (size_t i = 0; i < point_size - point_idxs.size(); i++)
{
int id = rand() % point_idxs.size();
point_idxs_repeat.push_back(point_idxs[id]);
}
}
else
{
std::vector<bool> flags(pts.size(), false);
for (size_t i = 0; i < point_size - point_idxs.size(); i++)
{
int id = rand() % point_idxs.size();
while (true)
{
if (flags[id] == false)
{
flags[id] = true;
break;
}
id = rand() % point_idxs.size();
}
point_idxs_repeat.push_back(point_idxs[id]);
}
}
point_idxs.insert(point_idxs.end(), point_idxs_repeat.begin(), point_idxs_repeat.end());
std::random_device rd;
std::mt19937 g(rd()); // 随机数引擎:基于梅森缠绕器算法的随机数生成器
std::shuffle(point_idxs.begin(), point_idxs.end(), g); // 打乱顺序,重新排序(随机序列)
std::vector<point> data_batch;
for (size_t i = 0; i < point_idxs.size(); i++)
{
data_batch.push_back(pts[point_idxs[i]]);
}
//std::cout << index_y << " " << index_x << std::endl;
for (size_t i = 0; i < point_size; i++)
{
data_batch[i].m_normal_x = data_batch[i].m_x / x_max;
data_batch[i].m_normal_y = data_batch[i].m_y / y_max;
data_batch[i].m_normal_z = data_batch[i].m_z / z_max;
data_batch[i].m_x -= (s_x + block_size / 2.0);
data_batch[i].m_y -= (s_y + block_size / 2.0);
data_batch[i].m_r /= 255.0;
data_batch[i].m_g /= 255.0;
data_batch[i].m_b /= 255.0;
data_room.push_back(data_batch[i]);
index_room.push_back(point_idxs[i]);
}
}
}
int n = point_num, m = index_room.size() / n;
std::vector<std::vector<point>> data_rooms(m, std::vector<point>(n, point()));
std::vector<std::vector<int>> index_rooms(m, std::vector<int>(n, 0));
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
data_rooms[i][j] = data_room[i * n + j];
index_rooms[i][j] = index_room[i * n + j];
}
}
std::vector<std::vector<int>> vote_label_pool(points_num, std::vector<int>(class_num, 0));
int num_blocks = data_rooms.size();
clock_t start = clock();
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "sem_seg");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"sem_seg.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names = {
"input.1" };
std::vector<const char*> output_node_names = {
"268" };
const size_t input_tensor_size = 1 * 9 * point_num;
std::vector<float> input_tensor_values(input_tensor_size);
for (int sbatch = 0; sbatch < num_blocks; sbatch++)
{
//std::cout << sbatch << std::endl;
int start_idx = sbatch;
int end_idx = std::min(sbatch + 1, num_blocks);
int real_batch_size = end_idx - start_idx;
std::vector<point> batch_data = data_rooms[start_idx];
std::vector<int> point_idx = index_rooms[start_idx];
std::vector<float> batch(point_num * 9);
for (size_t i = 0; i < point_num; i++)
{
batch[9 * i + 0] = batch_data[i].m_x;
batch[9 * i + 1] = batch_data[i].m_y;
batch[9 * i + 2] = batch_data[i].m_z;
batch[9 * i + 3] = batch_data[i].m_r;
batch[9 * i + 4] = batch_data[i].m_g;
batch[9 * i + 5] = batch_data[i].m_b;
batch[9 * i + 6] = batch_data[i].m_normal_x;
batch[9 * i + 7] = batch_data[i].m_normal_y;
batch[9 * i + 8] = batch_data[i].m_normal_z;
}
for (size_t i = 0; i < 9; i++)
{
for (size_t j = 0; j < point_num; j++)
{
input_tensor_values[i * point_num + j] = batch[9 * j + i];
}
}
std::vector<int64_t> input_node_dims = {
1, 9, point_num };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor));
std::vector<Ort::Value> output_tensors = session.Run(Ort::RunOptions{
nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
const float* rawOutput = output_tensors[0].GetTensorData<float>();
std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> prob(rawOutput, rawOutput + count);
std::vector<std::vector<float>> outputs(point_num, std::vector<float>(class_num, 0));
for (size_t i = 0; i < point_num; i++)
{
for (size_t j = 0; j < class_num; j++)
{
outputs[i][j] = prob[i * class_num + j];
//std::cout << outputs[i][j] << " ";
}
//std::cout << std::endl;
}
std::vector<int> pred_label(point_num, 0);
for (size_t i = 0; i < point_num; i++)
{
pred_label[i] = std::max_element(outputs[i].begin(), outputs[i].end()) - outputs[i].begin();
vote_label_pool[point_idx[i]][pred_label[i]] += 1;
}
}
clock_t stop = clock();
std::cout << stop - start << std::endl;
std::ofstream outfile("pred.txt");
for (size_t i = 0; i < points_num; i++)
{
int max_index = std::max_element(vote_label_pool[i].begin(), vote_label_pool[i].end()) - vote_label_pool[i].begin();
outfile << pts[i].m_x << " " << pts[i].m_y << " " << pts[i].m_z << " " << max_index << std::endl;
}
outfile.close();
return 0;
}
Observe que, como o C++ não pode ler arquivos no formato npy diretamente (você pode confiar em algumas bibliotecas), primeiro usamos um script python para converter arquivos npy em arquivos txt.
import numpy as np
npy = np.load("Area_1_conferenceRoom_1.npy")
np.savetxt('Area_1_conferenceRoom_1.txt', npy, fmt='%0.06f')