c++ multi-thread call python script

f3892437425b013db8603f936ac03bde.png

Why?

1. Improve efficiency.

2. At the beginning, the code used multiple processes in python, but later it was found that the Python code called by c++ could not open multiple processes. Since the Python interpreter has a global interpretation of the GIL, only one thread can have the interpreter at the same time. Therefore, when C++ multi-threads call a python script, it is necessary to control the GIL, and the thread obtains the GIL.

How to enable multithreading in c++

First, the header file #include<thread> must be introduced, and the functions and classes that manage threads are declared in the header file, including the std::thread class.

The statement "std::thread th1(test);" creates a thread named th1.

join() is a thread blocking method, and the main function blocks until the th1 child thread finishes running.

#include<iostream>
#include<thread>
using namespace std;


void test()
{
  cout << "子线程运行" << endl;
  cout << "子线程id为" << this_thread::get_id() << endl;
}
int main()
{
  cout << "主线程" << endl;
  thread th1(test);
  cout << "主线程中显示子线程id为" << th1.get_id() << endl;
  th1.join();
  return 0;
}

result:

1e3495427e222181318eeaacab348e7a.png

c++ multi-thread call python script

This involves the communication protocol between c++ and numpy. (You can check what I wrote before: https://mp.weixin.qq.com/s/DJCCXXGH_2658p2ytMvZMw)

In a C++ multi-threaded environment, directly calling the api to operate the Python interpreter will definitely cause a core dump, because most functions of Python are not thread-safe. Access order is controlled by the GIL.

The Python interpreter is not fully thread-safe. To support multithreaded Python programs, there is a global lock, called the global interpreter lock or GIL, that must be held by the current thread before it can safely access Python objects. Without locks, even the simplest operations can cause problems in multithreaded programs.

Not much to say, attach the code. Change the code to put a ct image of 800*512*512 into 4 different python threads for calculation, and finally get back the running results.

#include<iostream>
#include"include/Python.h"
#include"arrayobject.h"
#include<object.h>
#include<thread>
#include<vector>
using namespace std;
/*
class PythonThreadLocker
{
  PyGILState_STATE state;
public:
  PythonThreadLocker() : state(PyGILState_Ensure())
  {}
  ~PythonThreadLocker() {
    PyGILState_Release(state);
  }
};
void NumpyToPtr(PyArrayObject* pRet,short* ptr)
{
  int layer = pRet->dimensions[0],Rows = pRet->dimensions[1], columns = pRet->dimensions[2];
  int j = 0;
  for (int c = 0; c < layer; c++)
  {
    for (int Index_m = 0; Index_m < Rows; Index_m++) {


      for (int Index_n = 0; Index_n < columns; Index_n++) {


        ptr[j] = *(short*)(pRet->data + c* pRet->strides[0]+Index_m * pRet->strides[1] + Index_n * pRet->strides[2]);
        j++;
      }


    }
  }
  
}
void testmul(short* ptr, npy_intp* dims,short* ptr2, PyObject* times)
{
  PythonThreadLocker locker;
  
  PyObject* pModule = PyImport_ImportModule("mul");


  PyObject* PyArray = PyArray_SimpleNewFromData(3, dims, NPY_SHORT, ptr);


  PyObject* pFunc = PyObject_GetAttrString(pModule, "ff");




  //用tuple装起来
  PyObject* args = PyTuple_New(2);
  PyTuple_SetItem(args, 0, PyArray);
  PyTuple_SetItem(args, 1, times);
  PyArrayObject* pRet = (PyArrayObject*)PyEval_CallObject(pFunc, args);
  NumpyToPtr(pRet, ptr2);


  Py_CLEAR(pModule);
  Py_CLEAR(PyArray);
  Py_CLEAR(pFunc);
  Py_CLEAR(args);
  Py_CLEAR(pRet);
}
void pythoninit()
{
  Py_SetPythonHome(L"./");//指定python.exe位置需要修改成自己的  python的环境
  Py_Initialize();
  PyEval_InitThreads();//启用线程支持


  PyRun_SimpleString("import sys");
  PyRun_SimpleString("sys.path.append('./')");
  PyRun_SimpleString("import os");
  PyRun_SimpleString("print(os.listdir())");
}
int main()
{
  pythoninit();
  //初始化Numpy
  import_array();


  //造数据
  short* ptr = new short[800*512*512];
  for (int i = 0; i < 80 * 512 * 512; i++)
  {
    ptr[i] = i;
  }


  npy_intp dims[3] = { 800,512,512 };


  vector<short*> returnptr(4);
  for (int i = 0; i < 4; i++)
  {
    returnptr[i] = new short[800 * 512 * 512];
  }


  Py_BEGIN_ALLOW_THREADS;
  cout << PyGILState_Check() << endl;
  thread t1(testmul, ptr, dims, returnptr[0], Py_BuildValue("h", 10));
  thread t2(testmul, ptr, dims, returnptr[1], Py_BuildValue("h", 20));
  thread t3(testmul, ptr, dims, returnptr[2], Py_BuildValue("h", 30));
  thread t4(testmul, ptr, dims, returnptr[3], Py_BuildValue("h", 40));
  t1.join();
  t2.join();
  t3.join();
  t4.join();
  Py_END_ALLOW_THREADS;


  delete[] ptr;
  for (int i = 0; i < 4; i++)
  {
    delete[] returnptr[i];
  }
  Py_Finalize();
  return 0;
}
*/
import numpy as np
import time
def ff(data,times):
    print(data.shape)
    data = data.reshape(-1)
    data = np.clip(data,200,3000)
    data = data.reshape(-1,512,512)
    time.sleep(times)
    return data

おすすめ

転載: blog.csdn.net/weixin_41202834/article/details/121173770