!--------------------------------------------------------------------------- ! program: parallel_lu_solve, Fortran 90 version ! filename: par_lu.f ! ! description: ! Example use of ScaLAPACK LU factor and solve routines pdgetrf and ! pdgetrs to solve the system of linear equations Ax = b. The ! factorization step is timed and the result reported upon completion. ! ! associated files: Makefile, lu_solve.in ! !--------------------------------------------------------------------------- ! !--------------------------------------------------------------------------- !$$$$$$$$$$$$$$$$$$$ Main Program $$$$$$$$$$$$$$$$$$$$$$$$$$$$$ !--------------------------------------------------------------------------- program parallel_lu_solve implicit none ! the working precision of real variables, define to give double precision integer, parameter :: wp = kind(1.d0) ! system matrix A and inhomgenous term b real(kind=wp), allocatable, dimension(:,:) :: A real(kind=wp), allocatable, dimension(:) :: b ! description arrays for A and b integer, dimension(9) :: desc_A, desc_b integer :: sys_order, block_size, nproc integer :: nprow, npcol, icontxt, myrow, mycol integer :: mloc_A, nloc_A, mloc_b real(kind=wp) :: wall_time !--------------------------------------------------------------------------- ! read input and set number of processors call get_parameters(sys_order,block_size,nproc) ! initialize BLACS grid call init_grid(sys_order,block_size,nprow,npcol,myrow,mycol,icontxt, & desc_A,desc_b,mloc_A,nloc_A,mloc_b) ! allocate space for matrices allocate(A(mloc_A,nloc_A),b(mloc_b)) ! initialize and solve system, return timing results call init_system(A,b,sys_order,block_size,mloc_A,nloc_A,mloc_b, & myrow,mycol,nprow,npcol) call solver(A,b,sys_order,desc_A,desc_b,wall_time) ! root process outputs timing results if ((myrow == 0) .and. (mycol == 0)) then call put_results(sys_order,block_size,nprow,npcol,wall_time) endif ! deallocate space deallocate(A,b) ! exit BLACS call blacs_exit(0) !-------------------- subroutines ----------------------------- contains !=========================================================================== subroutine get_parameters(sys_order,block_size,nproc) ! argument declarations integer, intent(out) :: sys_order, block_size, nproc ! local variables integer :: mypnum !--------------------------------------------------------------------------- ! get input parameters from input file lu_solve.in open(10,file='lu_solve.in',status='old') read(10,*) sys_order read(10,*) block_size close(10) ! get number of proceses being used call blacs_pinfo(mypnum,nproc) end subroutine get_parameters !=========================================================================== subroutine init_grid(sys_order,block_size,nprow,npcol,myrow,mycol,icontxt, & desc_A,desc_b,mloc_A,nloc_A,mloc_b) ! argument declarations integer, intent(in) :: sys_order, block_size integer, intent(out) :: nprow, npcol, myrow, mycol, icontxt integer, intent(out) :: mloc_A, nloc_A, mloc_b integer, dimension(9), intent(out) :: desc_A, desc_b ! local variables integer :: numroc ! ScaLAPACK TOOLS utility routine integer :: info !--------------------------------------------------------------------------- ! define processor grid, a 1xNPROCS grid is used nprow = 1 npcol = nproc ! initialize BLACS processor grid call blacs_get(0,0,icontxt) call blacs_gridinit(icontxt,'R',nprow,npcol) call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) ! determine size of local matrices mloc_A = numroc(sys_order,block_size,myrow,0,nprow) nloc_A = numroc(sys_order,block_size,mycol,0,npcol) mloc_b = numroc(sys_order,block_size,myrow,0,nprow) ! set up descriptor vectors for matrix a and vector b call descinit(desc_A, sys_order, sys_order, block_size, block_size, 0, 0, & icontxt, mloc_A, info) call descinit(desc_b, sys_order, 1, block_size, block_size, 0, 0, & icontxt, mloc_b, info) end subroutine init_grid !=========================================================================== subroutine init_system(A,b,sys_order,block_size,mloc_A,nloc_A,mloc_b, & myrow,mycol,nprow,npcol) implicit none ! argument declarations real(kind=wp), dimension(:,:), intent(out) :: A real(kind=wp), dimension(:), intent(out) :: b integer, intent(in) :: sys_order, mloc_A, nloc_A, mloc_b, block_size integer, intent(in) :: myrow, mycol, nprow, npcol ! local variables integer :: indxl2g, i, i_loc, j, j_loc !--------------------------------------------------------------------------- ! set up portion of system matrix A owned by this process do j_loc = 1, nloc_A j = indxl2g(j_loc,block_size,mycol,0,npcol) do i_loc = 1, mloc_A i = indxl2g(i_loc,block_size,myrow,0,nprow) A(i_loc,j_loc) = 1.0_wp + sqrt(dble(abs(i-j))/dble(sys_order)) end do end do do j_loc = 1,mloc_b j = indxl2g(j_loc,block_size,myrow,0,nprow) b(j_loc) = j end do end subroutine init_system !=========================================================================== subroutine solver(A,b,sys_order,desc_A,desc_b,wall_time) ! argument declarations real(kind=wp), dimension(:,:), intent(inout) :: A real(kind=wp), dimension(:), intent(inout) :: b integer, intent(in) :: sys_order integer, dimension(9), intent(in) :: desc_A, desc_b real(kind=wp), intent(out) :: wall_time ! local variables integer :: iA, jA, ib, jb, nrhs integer :: info integer, dimension(sys_order+200) :: ipvt real(kind=wp) :: rtc real(kind=wp), dimension(2) :: wtime !--------------------------------------------------------------------------- iA=1; jA=1; ib=1; jb=1; nrhs=1 call blacs_barrier(icontxt,'ALL') ! Call LU factorization routine wtime(1) = rtc() call pdgetrf(sys_order,sys_order,A,iA,jA,desc_A,ipvt,info) wtime(2) = rtc() if (info .ne. 0) print*,'pdgetrf info: ',info ! Call LU solver routine call pdgetrs('N',sys_order,nrhs,A,iA,jA,desc_A,ipvt,b,ib,jb,desc_b,info) if (info .ne. 0) print*,'pdgetrs info: ',info wall_time = wtime(2) - wtime(1) call dgamx2d(icontxt,'All',' ',1,1,wall_time,1,1,1,-1,0,0) end subroutine solver !=========================================================================== subroutine put_header !--------------------------------------------------------------------------- write(*,*) write(*,fmt='(a)') 'pdgetrf timing' write(*,fmt='(a)') '==============' write(*,*) write(*,fmt='(a)') ' system block wall' write(*,fmt='(a)') ' size size procs time' write(*,fmt='(a)') ' ------ ----- ----- ----' end subroutine put_header !=========================================================================== subroutine put_results(sys_order,block_size,nprow,npcol,wall_time) ! argument declarations integer, intent(in) :: sys_order, block_size, nprow, npcol real(kind=wp), intent(in) :: wall_time !--------------------------------------------------------------------------- call put_header write(*,fmt='(2x,i5,5x,i3,6x,i2,6x,f6.2)') & sys_order,block_size,(nprow*npcol),wall_time end subroutine put_results !--------------------------------------------------------------------------- end program parallel_lu_solve