Actual source code: ex1hip.hip.cpp
1: static char help[] = "Benchmarking HIP kernel launch time\n";
2: /*
3: Running example on Crusher at OLCF:
4: # run with 1 mpi rank (-n1), 32 CPUs (-c32), and map the process to CPU 0 and GPU 0
5: $ srun -n1 -c32 --cpu-bind=map_cpu:0 --gpus-per-node=8 --gpu-bind=map_gpu:0 ./ex1hip
6: Average asynchronous HIP kernel launch time = 3.74 microseconds
7: Average synchronous HIP kernel launch time = 6.66 microseconds
8: */
9: #include <petscsys.h>
10: #include <petscdevice.h>
12: __global__ void NullKernel(){}
14: int main(int argc,char **argv)
15: {
16: PetscInt i,n=100000;
17: hipError_t cerr;
18: PetscLogDouble tstart,tend,time;
20: PetscInitialize(&argc,&argv,(char*)0,help);
21: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
23: /* Launch a sequence of kernels asynchronously. Previous launched kernels do not need to be completed before launching a new one */
24: PetscTime(&tstart);
25: for (i=0; i<n; i++) {hipLaunchKernelGGL(NullKernel,dim3(1),dim3(1),0,NULL);}
26: PetscTime(&tend);
27: hipStreamSynchronize(NULL); /* Sync after tend since we don't want to count kernel execution time */
28: time = (tend-tstart)*1e6/n;
29: PetscPrintf(PETSC_COMM_WORLD,"Average asynchronous HIP kernel launch time = %.2f microseconds\n",time);
31: /* Launch a sequence of kernels synchronously. Only launch a new kernel after the one before it has been completed */
32: PetscTime(&tstart);
33: for (i=0; i<n; i++) {
34: hipLaunchKernelGGL(NullKernel,dim3(1),dim3(1),0,NULL);
35: hipStreamSynchronize(NULL);
36: }
37: PetscTime(&tend);
38: time = (tend-tstart)*1e6/n;
39: PetscPrintf(PETSC_COMM_WORLD,"Average synchronous HIP kernel launch time = %.2f microseconds\n",time);
41: PetscFinalize();
42: return 0;
43: }
45: /*TEST
46: build:
47: requires: hip
49: test:
50: requires: hip
51: args: -n 2
52: output_file: output/empty.out
53: filter: grep "DOES_NOT_EXIST"
55: TEST*/