1 ....
2 #pragma hmpp testlabel codelet, target=CUDA, args[vout].io=inout
3 static void the_kernel(unsigned int N, unsigned int M,
4 float vout[N][M], float vin[N][M]){
5 int i, j;
6 for(i = 2; i < (N-2); i++) {
7 for(j = 2; j < (M-2); j++) {
8 float temp;
9 temp = vin[i][j]
10 + 0.3f *(vin[i-1][j-1] + vin[i+1][j+1])
11 - 0.506f *(vin[i-2][j-2] + vin[i+2][j+2]);
12 vout[i][j] = temp * (vout[i][j]);
13 }
14 } (上下オレンジ色のディレクティブ間で、関数がGPU計算すべくループ化されます)
15 }
16 int main(int argc, char **argv){
17 unsigned int n = 100;
18 unsigned int m = 20;
19 int i, j;
20 float resultat = 0.0f;
21 float out[n][m];
22 float in[n][m];
23 …
24 // init
25 for(i = 0 ; i < n ; i++){
26 for(j = 0 ; j < m ; j++){
27 in[i][j] = (COEFF) * (-1.0f);
28 out[i][j] = (COEFF) + (j * 0.01f) ;
29 }
30 }
31 #pragma hmpp testlabel callsite
32 the_kernel(n,m,out,in);
33 ....
34 printf("result : %f\n",resultat);
35 }
|