在一个非常简单的Cuda和C++程序上需要帮助
我是Cuda编程的新手。我试图创建一个简单的cuda和cpp图像处理程序,以改变图像的亮度,饱和度和对比度等。我开始使用一个非常简单的函数,只是为了改变图像的亮度,将所有RGB分量具有alpha值的图像。在一个非常简单的Cuda和C++程序上需要帮助
这是我的CPP项目:
#include <cutil_inline.h>
#include <cutil_gl_inline.h>
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
using namespace std;
struct ImageData {
unsigned char *data; /* Points to large array of R,G,B-order data */
int height;
int width;
};
ImageData imageData;
float *imageResult; // to store the image result from cuda after running the kernel
unsigned char *d_Input;
unsigned char *d_Output;
unsigned char *h_Output;
// These are CUDA functions to handle allocation and launching the kernels
extern "C" void initInput(unsigned char *data, unsigned char **device, unsigned int size);
extern "C" void filter(unsigned char *d_src, unsigned char *d_dest, int width, int height, int filterMode,
float alpha, float contrast, float saturation, bool use_array);
void initCuda()
{
unsigned int size = imageData.width * imageData.height * 3 * sizeof(unsigned char);
cutilSafeCall(cudaMalloc ((void**) &d_Input, size)); // allocate storage for device image input
cutilSafeCall(cudaMalloc ((void**) &d_Output, size)); // allocate storage for device image output
initInput(imageData.data, &d_Input, size);
}
int main() {
loadPPMImageData((char *)"boxes.ppm", &imageData); //this function is defined in another file
cudaGLSetGLDevice(0);
initCuda();
filter( d_Input, d_Output, imageData.width, imageData.height, 1, 0.8, 1.0, 1.0, 1);
cutilSafeCall(cudaMemcpy(h_Output, d_Output, size, cudaMemcpyDeviceToHost)); // copy output data from device to host
//print the output
for (int i = 0; i < imageData.size; i++) {
cout << d_Output
}
// do some memory cleanups
//done
return 0
}
这是我kernel.cu文件:
#include <iostream>
#include <cstdlib>
#include <string>
#include <cmath>
#include <shrUtils.h>
#include <cutil_inline.h>
#include <cutil_math.h>
//Kernel function
__global__ void
applyAlpha(unsigned char* input, unsigned char* output, int width, int height, float alpha)
{
// calculate normalized coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
output[ ((y * width + x) * 3) + 0] = (int) ((int)input [ ((y * width + x) * 3) + 0] * alpha); // r
output[ ((y * width + x) * 3) + 1] = (int) ((int)input [ ((y * width + x) * 3) + 1] * alpha); // g
output[ ((y * width + x) * 3) + 2] = (int) ((int)input [ ((y * width + x) * 3) + 2] * alpha); // b
}
extern "C"
int iDivUp(int a, int b){
return (a % b != 0) ? (a/b + 1) : (a/b);
}
extern "C"
void initInput(unsigned char *data, unsigned char **deviceArray, unsigned int size) // /* image data, device pointer, etc */)
{
/* TODO: Array version DONE
* Initialize device memory for array version
* and cuda arrays for texture version here
*/
cutilSafeCall(cudaMemcpy(deviceArray, data, size, cudaMemcpyHostToDevice)); // copy image data from host to device (array version)
//TODO: Texture version
}
extern "C"
void filter(unsigned char *d_src, unsigned char *d_dest, int width, int height, int filter_mode, float alpha, float contrast, float saturation, bool use_array)
{
/* TODO
* run different kernels for array and texture version
*/
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(iDivUp (width, dimBlock.x), iDivUp(height, dimBlock.y), 1);
if (use_array) { // Array version
if (filter_mode == 1) { // filter mode: brightness (alpha)
applyAlpha<<< dimGrid, dimBlock >>>(d_src, d_dest, width, height, alpha);
// check if kernel execution generated an error
cutilCheckMsg("Kernel execution failed");
cutilSafeCall(cutilDeviceSynchronize());
}
}
else { //Texture Version
//not yet implemented
}
}
//编辑 我已经修改上述文件的基础上,安德鲁的回答。 不过现在我编译它后得到了以下错误:
ld: warning: ignoring file kernel.o, file was built for i386 which is not the architecture being linked (x86_64)
Undefined symbols for architecture x86_64:
"_initInput", referenced from:
initCuda() in CS380_prog4.o
"_filter", referenced from:
display() in CS380_prog4.o
(maybe you meant: ___GLEW_SGIS_texture_filter4, ___GLEW_EXT_texture_filter_anisotropic , ___GLEW_NV_multisample_filter_hint)
ld: symbol(s) not found for architecture x86_64
collect2: ld returned 1 exit status
make: *** [testprog] Error 1
我用在这两个这些功能的“外部C”命令:initInput,和过滤。函数声明(在test.cpp中)和定义(在kernel.cu中)也有相同的参数,但它仍然抱怨它找不到该函数。我如何解决这个链接问题?
您正在将.cu文件直接包含在.cpp文件中,该文件将内容有效地复制到.cpp文件中。 nvcc将使用一个标准的C++编译器来编译它(unix平台上的g ++),它不知道任何Cuda语法的含义。
您必须编译每一个的目标文件,然后用C++编译器将它们链接,使得头导出的功能,在.CU文件,你会为标准C.以同样的方式
嗨安德鲁,我已经删除了包含行,因为你建议,现在我有一个链接错误。即使我在cpp文件中使用了“extern C”,C++程序也找不到在内核中定义的一些函数的定义。你有什么建议吗?谢谢 – 2011-04-22 22:07:37
有一个很好的例子在cuda-grayscale。
它用于在CUDA 3.1上进行编译。在那里有一个Makefile,在它的顶部。
CXX=g++
CUDA_INSTALL_PATH=/usr/local/cuda
CFLAGS= -I. -I$(CUDA_INSTALL_PATH)/include `pkg-config --cflags opencv`
LDFLAGS= -L$(CUDA_INSTALL_PATH)/lib -lcudart `pkg-config --libs opencv`
all:
$(CXX) $(CFLAGS) -c main.cpp -o Debug/main.o
nvcc $(CUDAFLAGS) -c kernel_gpu.cu -o Debug/kernel_gpu.o
$(CXX) $(LDFLAGS) Debug/main.o Debug/kernel_gpu.o -o Debug/grayscale
clean:
rm -f Debug/*.o Debug/grayscale
什么是你的操作系统的位和你安装了哪个版本的cuda SDK?看来nvcc正在编译三十二位,但你的C++编译器是六十四位。最好的猜测是你的平台有错误的Sdk。此外,将__host__和__device__关键字放在cu文件的方法中是一个好主意。 – asm 2011-04-23 13:26:51
我现在在使用nvcc编译器的机器上,看起来你可以告诉它使用-m选项编译为64位或32位。如果你运行nvcc --help并查看--machine选项,它会告诉你默认是什么。我会检查这个,看看默认值是否与你的操作系统的位数不同。 – asm 2011-04-23 16:52:00