在《GPU编程CUDA实战》第45页有个Julia数据集,本文把这个CUDA的程序改成了java的,然后每个程序运行20次,统计时间,比较这两个程序那个快些。
java(ms) | c++(ms) |
202 | 1904 |
269 | 1850 |
228 | 1737 |
242 | 1712 |
181 | 1770 |
222 | 1774 |
272 | 1733 |
295 | 1681 |
238 | 1320 |
282 | 1725 |
248 | 1833 |
294 | 1478 |
264 | 1692 |
176 | 1431 |
229 | 1479 |
230 | 1457 |
256 | 1645 |
193 | 1802 |
239 | 1208 |
230 | 1537 |
239.5 | 1638.4 |
结果是惊人的,java完成同样的计算任务用的时间只有CUDA的15%,就这道题来说Java的速度是CUDA的5.8倍.
CUDA程序
#include "book.h" #include "cuda_runtime.h" #include "cpu_bitmap.h" #include<iostream> #include <time.h> using namespace std; #define DIM 1000 /****************************************/ struct cuComplex { float r; float i; __device__ cuComplex(float a,float b) : r(a),i(b) {} __device__ float magnitude2(void){ return r*r+i*i; } __device__ cuComplex operator*(const cuComplex& a) { return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i); } __device__ cuComplex operator+(const cuComplex& a){ return cuComplex(r+a.r,i+a.i); } }; /***************************************/ __device__ int julia(int x ,int y){ const float scale =2; float jx=scale*(float)(DIM/2-x)/(DIM/2); float jy=scale*(float)(DIM/2-y)/(DIM/2); cuComplex c(-0.8,0.156); cuComplex a(jx,jy); int i=0; for(i=0 ;i<200; i++){ a=a*a+c; if(a.magnitude2()>1000) return 0; } return 1; } __global__ void kernel(unsigned char *ptr){ int x=blockIdx.x; int y=blockIdx.y; int offset=x+y*gridDim.x; int juliaValue=julia(x,y); ptr[offset*4+0]=255*juliaValue; ptr[offset*4+1]=0; ptr[offset*4+2]=0; ptr[offset*4+3]=255; } int main(void){ clock_t start,ends; start=clock(); CPUBitmap bitmap(DIM,DIM); unsigned char *dev_bitmap; HANDLE_ERROR (cudaMalloc( (void**)&dev_bitmap,bitmap.image_size())); dim3 grid(DIM,DIM); kernel<<< grid,1>>>(dev_bitmap); HANDLE_ERROR (cudaMemcpy (bitmap.get_ptr(),dev_bitmap,bitmap.image_size(),cudaMemcpyDeviceToHost )); ends=clock(); cout<<(ends-start)*1000/CLOCKS_PER_SEC <<endl; bitmap.display_and_exit(); HANDLE_ERROR (cudaFree(dev_bitmap)); }
Java程序
import java.awt.Color; import java.awt.Font; import java.awt.Graphics; import java.io.FileWriter; import java.io.IOException; import java.text.DecimalFormat; import javax.swing.JFrame; import javax.swing.JPanel; public class julia1 extends JFrame { MyPanedrawjulia1 mp = null ; //1 public static void main(String[] args) { long sysDate1 = System.currentTimeMillis(); julia1 qwe = new julia1(); //2 long sysDate2 = System.currentTimeMillis(); System.out.println(sysDate2-sysDate1 ); } public julia1() //3 { mp = new MyPanedrawjulia1(); //4 this.add(mp); this.setSize(3000,2000); this.setVisible(true); this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); } } class MyPanedrawjulia1 extends JPanel //用于绘图和实现绘图区域 //5 { public void paint(Graphics g) { super.paint(g); try { julia( g ); } catch (IOException e1) { e1.printStackTrace(); } } private void julia(Graphics g) throws IOException { g.setFont(new Font("宋体",Font.BOLD,20)); int dim=1000; double c=-0.8; double d=0.156; double rate=1.6; DecimalFormat df = new DecimalFormat( "0.0000000000"); for(double a=1 ;a<1000;a++) { for(double b=1 ;b<1000;b++) { double p1=rate*(dim/2-a)/(dim/2); double p2=rate*(dim/2-b)/(dim/2); double sum=0; for( int n=0 ;n<200;n++ ) { double r1=0.0; double r2=0.0; r1=Math.pow(p1, 2)-Math.pow(p2, 2)+c; r2=2*p1*p2+d; p1=r1; p2=r2; sum=sum+(Math.pow(p1, 2)+Math.pow(p2, 2)); if (sum>1000) { break; } } if(sum<1000) { //System.out.println(a+" ***** "+b +" "+sum+" " ); g.setColor(Color.red); g.drawOval((int)(a), (int)(b),2, 2); } /*************************************************/ } } } }
CUDA生成图片
Java生成图片