3 其他

1 实验环境

Ubuntu 16.04 + Qt5 + CUDA9.0

2 实例

CUDA配置全部放在.pro文件中
main.cpp为主函数使用g++编译
kernel.cu为核函数使用nvcc编译

2.1 kernel.cu

// kernel.cu

#include <cuda_runtime.h>
#include <stdio.h>

extern "C" void runCudaPart();

__global__ void addAry(int *ary1, int *ary2)
{
    int indx = threadIdx.x;
    ary1[indx] += ary2[indx];
}

// Main cuda function
void runCudaPart()
{
    int ary1[32];
    int ary2[32];
    int res[32];

    for(int i=0;i<32;i++)
    {
        ary1[i] = i;
        ary2[i] = 2*i;
        res[i]  = 0;
    }

    int *dev_ary1, *dev_ary2;

    /* Allocate GPU buffers */
    cudaMalloc((void**)&dev_ary1, 32*sizeof(int));
    cudaMalloc((void**)&dev_ary2, 32*sizeof(int));

    /* Copy input vectors from host memory to GPU buffers */
    cudaMemcpy((void*)dev_ary1, (void*)ary1, 32*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy((void*)dev_ary2, (void*)ary2, 32*sizeof(int), cudaMemcpyHostToDevice);

    /* Launch a kernel on the GPU with one thread for each element */
    addAry<<<4,8>>>(dev_ary1, dev_ary2);
    /* cudaThreadSynchronize waits for the kernel to finish */
    cudaThreadSynchronize();

    /* Copy output vector from GPU buffer to host memory */
    cudaMemcpy((void*)res, (void*)dev_ary1, 32*sizeof(int), cudaMemcpyDeviceToHost);
    for( int i=0 ; i<32 ; i++ )
        printf( "result[%d] = %d\n", i, res[i]);

    cudaFree(dev_ary1);
    cudaFree(dev_ary2);
}

2.2 kernel.h

// kernel.h

#ifndef KERNEL_H
#define KERNEL_H

extern "C" void runCudaPart();

#endif // KERNEL_H

2.3 main.cpp

#include <iostream>
#include "kernel.h"

int main()
{
    std::cout << "--- This is a demo:" <<std::endl;
    runCudaPart();
    std::cout << "--- Ending ... " <<std::endl;
    return 0;
}

2.4 pro文件的配置

TEMPLATE = app
CONFIG  += console c++11
CONFIG  -= app_bundle
CONFIG  -= qt
QT      += core
QT      -= gui

TARGET  = cuda-demo
# Specify the directory where the generated application is placed
#DESTDIR += ./
SOURCES += main.cpp
OBJECTS_DIR = ./debug__
CUDA_OBJECTS_DIR = ./debug__

# This makes the .cu files appear in your project
#OTHER_FILES +=  ./kernel.cu

# CUDA settings <-- may change depending on your system
CUDA_SOURCES += ./kernel.cu
CUDA_SDK = "/usr/local/cuda-9.0/"   # Path to cuda SDK install
CUDA_DIR = "/usr/local/cuda-9.0/"   # Path to cuda toolkit install

# DO NOT EDIT BEYOND THIS UNLESS YOU KNOW WHAT YOU ARE DOING....
SYSTEM_NAME = ubuntu        # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 64            # '32' or '64', depending on your system
CUDA_ARCH = sm_50           # Type of CUDA architecture,
                            # for example 'compute_10', 'compute_11', 'sm_10'
NVCC_OPTIONS = --use_fast_math


# include paths
INCLUDEPATH += $$CUDA_DIR/include

# library directories
QMAKE_LIBDIR += $$CUDA_DIR/lib64/

# Add the necessary libraries
CUDA_LIBS = -lcuda -lcudart

# The following makes sure all path names (which often include spaces)
# are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
LIBS += $$CUDA_LIBS

# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
    # Debug mode
    cuda.input  = CUDA_SOURCES
    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
    cuda.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS \
                    $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE \
                    -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
    cuda.dependency_type = TYPE_C
    QMAKE_EXTRA_COMPILERS += cuda
}
else {
    # Release mode
    cuda.input = CUDA_SOURCES
    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS \
                    $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE \
                    -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
    cuda.dependency_type = TYPE_C
    QMAKE_EXTRA_COMPILERS += cuda
}

HEADERS += \
    kernel.h

2.5 运行结果

运行结果

3 其他

有关pro文件的配置情况，可以参看《Qt 之 pro 配置详解》

Ubuntu 16.04 中 QT creator 运行CUDA程序的配置

1 实验环境

2 实例

2.1 kernel.cu

2.2 kernel.h

2.3 main.cpp

2.4 pro文件的配置

2.5 运行结果

3 其他

猜你喜欢