! ============================================================== ! Simple Fortran90-example that demonstrates a GPU-accellerated ! region (observe the "!$acc" -lines in the subroutine). ! ! Compile this code (abc.cuf) with: ! pgf90 -ta=nvidia -o abc.exe abc.cuf -Minfo ! ! Run it with: ! export LD_LIBRARY_PATH=/com/cuda/lib64:$LD_LIBRARY_PATH ! ./abc.exe ! ! ============================================================== subroutine sum( a, b, c, n) real*4 a(n,n), b(n,n), c(n,n) integer i,j,n !$acc region do i=1,n do j=1,n c(i,j) = a(i,j)+ b(i,j) enddo enddo !$acc end region end program main use cudafor type(cudadeviceprop):: prop integer i, j, n parameter (isize=10) real*4 input_a(isize,isize), input_b(isize,isize), output_c(isize,isize) j=cudaGetDeviceProperties( prop, 0) write(*,*)" name=",trim(prop%name) write(*,*)" totalGlobalMem=",prop%totalGlobalMem write(*,*)" sharedMemPerBlock=",prop%sharedMemPerBlock write(*,*)" regsPerBlock=",prop%regsPerBlock write(*,*)" warpSize=",prop%warpSize write(*,*)" memPitch=",prop%memPitch write(*,*)" maxThreadsPerBlock=",prop%maxThreadsPerBlock write(*,*)" maxThreadsDim=",prop%maxThreadsDim write(*,*)" maxGridSize=",prop%maxGridSize write(*,*)" totalConstMem=",prop%totalConstMem write(*,*)" major=",prop%major write(*,*)" minor=",prop%minor write(*,*)" clockRate=",prop%clockRate write(*,*)" textureAlignment=",prop%textureAlignment write(*,*)" deviceOverlap=",prop%deviceOverlap write(*,*)" multiProcessorCount=",prop%multiProcessorCount write(*,*)"kernelExecTimeoutEnabled=",prop%kernelExecTimeoutEnabled write(*,*)" integrated=",prop%integrated write(*,*)" canMapHostMemory=",prop%canMapHostMemory write(*,*)" computeMode=",prop%computeMode n = isize do i=1,n do j=1,n input_a(i,j) = i input_b(i,j) = j enddo enddo call sum(input_a,input_b,output_c,n) do i=1,n do j=1,n print*, output_c(i,j) enddo enddo end