cuda C使用boost库
本次实验的前提是cuda环境和boost环境都搭建好了。
使用nvcc编译含有boost库的代码。
nvcc cuda_Array.cu -o Array -I. -arch=sm_35
就可以了。
一下是我的代码,前提是目录下一定要求data.txt文件,要不然会出错。
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <boost/timer.hpp>
#include <boost/progress.hpp>
#include <cuda_runtime.h>
#include <cmath>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <stdlib.h>
using namespace std;
__global__ void SNR_gpu(double* d_a,double* d_b,double* d_c,int n)
{
const int tidx = threadIdx.x;
const int bidx = blockIdx.x;
const int t_n = gridDim.x * blockDim.x;
int tid = bidx * blockDim.x + tidx;
while(tid < n)
{
d_c[tid] = 20 * log10(d_a[tid] / d_b[tid]);
tid += t_n;
}
}
int main()
{
boost::timer t;
//boost::progress_timer t;
const int row = 40; // 设置行数
const int line = 8064; // 设置列数
const int threads = 1024; // 设置线程数
const int blocks = 8; // 设置块的个数
const int n = 8064; // 设置内存大小
// thrust::host_vector<thrust::host_vector<double> > V;
// thrust::host_vector<double> one_row;
vector<std::string >v(40); // 启用精度条
// vector<double> one_row; // 设置中间向量
// vector<double>::iterator it; // 这是迭代器
// vector<double>::iterator it1;
double* d_a,*d_b,*d_c,**V,*h_c; // 设置数据指针
const int Nbytes = n *sizeof(double); // 设置字节大小
V = (double**)malloc(row * sizeof(double*));
h_c = (double*)malloc(Nbytes); // 申请主设备内存大小
for(int i = 0; i < row;i++)
{
V[i] = (double*)malloc(line * sizeof(double));
}
cout << "提取数据中..." << endl;
ifstream data("data.txt"); // 读文件
double d = 0; // 中间值
std::ofstream fs("./data.txt");
boost::progress_display pd(v.size());
for(int row_count = 0;row_count < row;row_count++)
{
for(int line_count = 0;line_count < line;line_count++)
{
data >> d;
V[row_count][line_count] = d;
//one_row.push_back(d);
}
pd.restart(v.size());
pd += row_count;
//std::cout << row_count << std::endl;
// V.push_back(one_row);
}
data.close();
/* for(int i = 0; i < V.size();i++)
{
for(int j = 0; j < V[i].size(); j++)
{
cout << "V[" << i << "]["<<j<<"]="<< setprecision(16) << V[i][j] << endl;
}
}
*/
/* for(int i = 0; i < row;i++)
{
for(int j = 0; j < line; j++)
{
cout << "V[" << i << "]["<<j<<"]="<< setprecision(16) << V[i][j] << endl;
}
}
*/
cout << "拷贝数据中..."<<endl;
cudaMalloc((double**)&d_a,Nbytes);
cudaMalloc((double**)&d_b,Nbytes);
cudaMalloc((double**)&d_c,Nbytes);
//double* it = &V[0];
//double* it1 = &V[1];
cudaMemcpy(d_a,V[0],Nbytes,cudaMemcpyHostToDevice);
cudaMemcpy(d_b,V[1],Nbytes,cudaMemcpyHostToDevice);
cout << "GPU运算中..." << endl;
SNR_gpu<<<blocks,threads>>>(d_a,d_b,d_c,n);
cout << "运算完成..."<< endl;
cudaMemcpy(h_c,d_c,Nbytes,cudaMemcpyDeviceToHost);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
cout << "数据输出..."<<endl;
/* for(int i = 0; i < n;i++)
{
cout << "h_c[" << i << "]=" << h_c[i]<< endl;
}
*/ //V.clear();
for(int i = 0; i < row; i++)
{
free(V[i]);
}
free(V);
free(h_c);
std::cout << "now time elapsed:" << t.elapsed() << "s" << std::endl;
return 0;
}
这是效果图。