! Copyright (C) 2014, Kirana Kumara P ! Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: ! The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. ! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ! ------------------------------------------------------------------------------------------------------------------------- ! This code is authored by Kirana Kumara P, Centre for Product Design and Manufacturing, Indian Institute of Science, Bangalore - 560012, India. ! Author hereby declares that the present code is the original work of the author. Further, author hereby declares that the present code, in full or in part, is not a translation or a copy of any of the existing codes written by someone else. ! Author's institution (Indian Institute of Science) has informed the author in writing that the institution is not interested in claiming any copyright on the present code. ! Contact email ID of the author is: kiranakumarap@gmail.com ! It may be helpful to go through the following two sources to understand the present code: ! [1] Kirana Kumara P, A MATLAB Code for Three Dimensional Linear Elastostatics using Constant Boundary Elements, International Journal of Advances in Engineering Sciences, Vol. 2, No. 3, pp. 9-20, 2012. ! [2] Kirana Kumara P, A study of the speed and the accuracy of the Boundary Element Method as applied to the computational simulation of biological organs, Available from: http://arxiv.org/abs/1311.4533v1 ! ------------------------------------------------------------------------------------------------------------------------- ! This is a Fortran 90 program which solves any three dimensional linear elastostatic (without body forces) problem using constant boundary elements. ! This is a parallelized code (uses BLACS and ScaLAPACK) ! Strongly singular integrals are handled using rigid body modes ! Weakly singular integrals are not handled by any special methods. The program uses 16 by 16 (256 in total) integration points over each of the boundary elements; high number of numerical integration points take care of weak singularity. Program can be easily modified to use 4 by 4, 8 by 8, or 32 by 32 integration points (locations and weights of the integration points for these cases are listed in this code too). Test runs showed that using 4 by 4 integration does not give very accure results whereas 8 by 8 integration is fine. But use 16 by 16 integration to ensure that one does not get erroneous results because of inaccurate integration. There is not much improvement in accuracy by using more than 16 by 16 integration points. ! This code has been tested using Mvapich2-1.8 and Intel Fortran compiler (Version 2011.5.220). Intel Math Kernel Library was also essential. ! This program uses a geometry that is described by 96 triangular boundary elements. However, the program can be very easily modified to solve any problem that uses a geometry that is described by any number of surface triangles. ! Input is through a file named 'inbem96.txt'. The file lists the x, y, and z coordinates of the three vertices of boundary elements that together describe the geometry. ! Output is through files named 'outbem96p.txt' and 'timebem96p.txt'. The file 'outbem96p.txt' contains the solution, i.e., displacements (along x, y, and z direction) or tractions at each of the boundary elements. The file 'timebem96p.txt' shows the time taken to solve the problem, i.e., the Wall-clock time from the first call to MPI_WTIME() to the second call to MPI_WTIME(). Outputs are also written to the screen. ! In this program, element numbers from 65 to 80 are fixed. Element numbers from 1 to 64 are subjected to zero traction, and element numbers from 81 to 96 are subjected to a traction '4' in the y direction. Hence, in total, 16 elements are subjected to displacement boundary condition, and 80 elements are subjected to traction boundary condition. When using this program to solve another problem, suitable modifications have to be made to incorporate appropriate boundary conditions. ! It may help to refer to "Intel Math Kernel Library Reference Manual" (from Intel) and "Parallel ESSL Guide and Reference" (from IBM) to learn the usage of the library routines, and also to learn some theory (like Block-Cyclic Data Distribution) in some cases. program bemconstp implicit none integer :: mypnum,nprocs,icontxt,NUMROC,ia,ja,ib,jb,INDXL2G double precision :: start,finish,time double precision :: E,mu,pi,MPI_WTIME double precision :: dispbcelements(16,4), fbcelements(80,4), xyzofelements(96*3,4),xyzofelements0(4,96*3) double precision,allocatable :: nx(:),ny(:),nz(:),xbar(:),ybar(:),zbar(:),jacobian(:) double precision :: tonedim(16),vonedim(16),weightonedim(16),t(256),v(256),u(256),weight(256),xx(256),y(256),z(256) double precision,allocatable :: K(:,:),F(:,:),Kp(:,:),Fp(:,:) integer :: ndex,vindex,tindex,h,i,j,nelement,sizedispbcelements,sizefbcelements,ndispbcelements,nfbcelements double precision :: x1,x2,x3,y1,y2,y3,z1,z2,z3,d,alpha,beta,gama,sigma,rr,drbydx,drbydy,drbydz,costheta,G,C1,C2,C3,CC double precision :: ITxxE,ITxyE,ITxzE,ITyxE,ITyyE,ITyzE,ITzxE,ITzyE,ITzzE,IUxxE,IUxyE,IUxzE,IUyxE,IUyyE,IUyzE,IUzxE,IUzyE double precision :: IUzzE,Txx,Txy,Txz,Tyx,Tyy,Tyz,Tzx,Tzy,Tzz,Uxx,Uxy,Uxz,Uyx,Uyy,Uyz,Uzx,Uzy,Uzz double precision,allocatable :: CPVITxxE(:),CPVITxyE(:),CPVITxzE(:),CPVITyxE(:),CPVITyyE(:),CPVITyzE(:) double precision,allocatable :: CPVITzxE(:),CPVITzyE(:),CPVITzzE(:) integer :: info,nrhs=1,n=96*3,istat,totalsize,descK(9),descF(9),block double precision :: X(96*3,1),ra(96*3,96*3),ca(96*3,96*3) ! 'nprocsr' is the total number of rows in the process grid. 'nprocsc' is the total number of columns in the process grid. (nprocsr * nprocsc) is equal to the total number of processes ('nprocs') in the parallel program. 'mbK' is the row block size and 'nbK' is the column block size for the 'characteristic' matrix. 'mbF' is the row block size and 'nbF' is the column block size for the right hand size. 'mbK' should be equal to 'mbF', and 'nbF' should be equal to '1'. integer :: mK=96*3,nK=96*3,mbK=16,nbK=16,mF=96*3,nF=1,mbF=16,nbF=1,lldK,lldF,lldKc,lldFc,nprocsr=2,nprocsc=2,myrow,mycol integer,allocatable :: ipiv(:) call blacs_pinfo(mypnum,nprocs) call blacs_get(0,0,icontxt) call blacs_gridinit(icontxt,'R',nprocsr,nprocsc) call blacs_pcoord(icontxt, mypnum, myrow, mycol) ! Enter values of Young's modulus 'E' and Poisson's ratio 'mu' E=200000.0 mu=0.33 do i=1,16 dispbcelements(i,1)=i+64 dispbcelements(i,2)=0 dispbcelements(i,3)=0 dispbcelements(i,4)=0 enddo do i=1,64 fbcelements(i,1)=i fbcelements(i,2)=0 fbcelements(i,3)=0 fbcelements(i,4)=0 enddo do i=65,80 fbcelements(i,1)=i+16 fbcelements(i,2)=0 fbcelements(i,3)=4 fbcelements(i,4)=0 enddo open(101, file="inbem96.txt", status="unknown", action="read", form="formatted", IOSTAT=istat) read (101, *) xyzofelements0 close(101) do i=1,96*3 do j=1,4 xyzofelements(i,j)=xyzofelements0(j,i) enddo enddo pi=3.141592653589793 G=E/(2*(1+mu)) CC=1/(16*pi*G*(1-mu)) C1=3-4*mu C2=1/(8*pi*(1-mu)) C3=1-2*mu totalsize=size(xyzofelements,1) nelement=totalsize/3 sizedispbcelements=size(dispbcelements,1) ndispbcelements=sizedispbcelements sizefbcelements=size(fbcelements,1) nfbcelements=sizefbcelements allocate (nx(nelement)) allocate (ny(nelement)) allocate (nz(nelement)) allocate (xbar(nelement)) allocate (ybar(nelement)) allocate (zbar(nelement)) allocate (jacobian(nelement)) ! For using 4 by 4 integration ! tonedim=(/-0.861136311594 -0.339981043585 0.339981043585 0.861136311594/) ! vonedim=(/-0.861136311594 -0.339981043585 0.339981043585 0.861136311594/) ! weightonedim=(/0.347854845137 0.652145154863 0.652145154863 0.347854845137/) ! For using 8 by 8 integration ! tonedim=(/-0.960289856498 -0.796666477414 -0.525532409916 -0.183434642496 0.183434642496 0.525532409916 0.796666477414 0.960289856498/) ! vonedim=(/-0.960289856498 -0.796666477414 -0.525532409916 -0.183434642496 0.183434642496 0.525532409916 0.796666477414 0.960289856498/) ! weightonedim=(/0.10122853629 0.222381034453 0.313706645878 0.362683783378 0.362683783378 0.313706645878 0.222381034453 0.10122853629/) ! 16 by 16 integration tonedim=(/-0.989400934992, -0.944575023073, -0.865631202388, -0.755404408355, -0.617876244403, -0.458016777657, -0.281603550779, & -0.0950125098376, 0.0950125098376, 0.281603550779, 0.458016777657, 0.617876244403, 0.755404408355, 0.865631202388, & 0.944575023073, 0.989400934992/) vonedim=(/-0.989400934992, -0.944575023073, -0.865631202388, -0.755404408355, -0.617876244403, -0.458016777657, -0.281603550779, & -0.0950125098376, 0.0950125098376, 0.281603550779, 0.458016777657, 0.617876244403, 0.755404408355, 0.865631202388, & 0.944575023073, 0.989400934992/) weightonedim=(/0.027152459411, 0.0622535239372, 0.0951585116838, 0.124628971256, 0.149595988817, 0.169156519395, 0.182603415045, & 0.189450610455, 0.189450610455, 0.182603415045, 0.169156519395, 0.149595988817, 0.124628971256, 0.0951585116838, & 0.0622535239372, 0.027152459411/) ! For using 32 by 32 integration ! tonedim=(/-0.997263861849 -0.985611511545 -0.964762255588 -0.934906075938 -0.896321155766 -0.849367613733 -0.794483795968 -0.73218211874 -0.66304426693 -0.587715757241 -0.506899908932 -0.421351276131 -0.331868602282 -0.239287362252 -0.144471961583 -0.0483076656877 0.0483076656877 0.144471961583 0.239287362252 0.331868602282 0.421351276131 0.506899908932 0.587715757241 0.66304426693 0.73218211874 0.794483795968 0.849367613733 0.896321155766 0.934906075938 0.964762255588 0.985611511545 0.997263861849/) ! vonedim=(/-0.997263861849 -0.985611511545 -0.964762255588 -0.934906075938 -0.896321155766 -0.849367613733 -0.794483795968 -0.73218211874 -0.66304426693 -0.587715757241 -0.506899908932 -0.421351276131 -0.331868602282 -0.239287362252 -0.144471961583 -0.0483076656877 0.0483076656877 0.144471961583 0.239287362252 0.331868602282 0.421351276131 0.506899908932 0.587715757241 0.66304426693 0.73218211874 0.794483795968 0.849367613733 0.896321155766 0.934906075938 0.964762255588 0.985611511545 0.997263861849/) ! weightonedim=(/0.00701814576495 0.0162774265831 0.0253910098329 0.0342745478477 0.0428359896785 0.0509978738117 0.0586839394615 0.0658220603578 0.0723456094297 0.078193695762 0.083311711103 0.0876518688047 0.0911736454878 0.0938441590423 0.0956384754512 0.0965398415811 0.0965398415811 0.0956384754512 0.0938441590423 0.0911736454878 0.0876518688047 0.083311711103 0.078193695762 0.0723456094297 0.0658220603578 0.0586839394615 0.0509978738117 0.0428359896785 0.0342745478477 0.0253910098329 0.0162774265831 0.00701814576495/) ndex=1 do tindex=1,16 do vindex=1,16 t(ndex)=tonedim(tindex) v(ndex)=vonedim(vindex) weight(ndex)=weightonedim(tindex)*weightonedim(vindex) t(ndex)=0.5*t(ndex)+0.5 v(ndex)=0.5*v(ndex)+0.5 ndex=ndex+1 enddo enddo u=t*(1-v) allocate (CPVITxxE(nelement)) allocate (CPVITxyE(nelement)) allocate (CPVITxzE(nelement)) allocate (CPVITyxE(nelement)) allocate (CPVITyyE(nelement)) allocate (CPVITyzE(nelement)) allocate (CPVITzxE(nelement)) allocate (CPVITzyE(nelement)) allocate (CPVITzzE(nelement)) allocate (K(3*nelement,3*nelement)) allocate (F(3*nelement,1)) CPVITxxE=0 CPVITxyE=0 CPVITxzE=0 CPVITyxE=0 CPVITyyE=0 CPVITyzE=0 CPVITzxE=0 CPVITzyE=0 CPVITzzE=0 block=ceiling(real(nelement)/real(nprocs)) call blacs_barrier(icontxt,'A') do i=1,nelement x1=xyzofelements(3*i-2,2) y1=xyzofelements(3*i-2,3) z1=xyzofelements(3*i-2,4) x2=xyzofelements(3*i-1,2) y2=xyzofelements(3*i-1,3) z2=xyzofelements(3*i-1,4) x3=xyzofelements(3*i,2) y3=xyzofelements(3*i,3) z3=xyzofelements(3*i,4) d=(((y2-y1)*(z3-z1)-(z2-z1)*(y3-y1))**2+((z2-z1)*(x3-x1)-(x2-x1)*(z3-z1))**2+((x2-x1)*(y3-y1)-(y2-y1)*(x3-x1))**2)**0.5 nx(i)=((y2-y1)*(z3-z1)-(z2-z1)*(y3-y1))/d ny(i)=((z2-z1)*(x3-x1)-(x2-x1)*(z3-z1))/d nz(i)=((x2-x1)*(y3-y1)-(y2-y1)*(x3-x1))/d alpha=sqrt((x1-x2)**2+(y1-y2)**2+(z1-z2)**2) beta=sqrt((x2-x3)**2+(y2-y3)**2+(z2-z3)**2) gama=sqrt((x3-x1)**2+(y3-y1)**2+(z3-z1)**2) sigma=(alpha+beta+gama)/2 jacobian(i)=2*sqrt(sigma*(sigma-alpha)*(sigma-beta)*(sigma-gama)) xbar(i)=(x2-x1)*0.25+(x3-x1)*0.5+x1 ybar(i)=(y2-y1)*0.25+(y3-y1)*0.5+y1 zbar(i)=(z2-z1)*0.25+(z3-z1)*0.5+z1 enddo ! Upto this line, this program runs on every process (processor) ! The following 'if' statement runs on process '0' only call blacs_barrier(icontxt,'A') if (mypnum==0) then start=MPI_WTIME() endif call blacs_barrier(icontxt,'A') ! The following 'do' loop is executed on all processes but for different values of i's (embarrasingly parallel) do i=block*mypnum+1,min(block*mypnum+block,nelement) do j=1,nfbcelements x1=xyzofelements(3*int(fbcelements(j,1))-2,2) y1=xyzofelements(3*int(fbcelements(j,1))-2,3) z1=xyzofelements(3*int(fbcelements(j,1))-2,4) x2=xyzofelements(3*int(fbcelements(j,1))-1,2) y2=xyzofelements(3*int(fbcelements(j,1))-1,3) z2=xyzofelements(3*int(fbcelements(j,1))-1,4) x3=xyzofelements(3*int(fbcelements(j,1)),2) y3=xyzofelements(3*int(fbcelements(j,1)),3) z3=xyzofelements(3*int(fbcelements(j,1)),4) xx=(x2-x1)*u+(x3-x1)*v+x1 y=(y2-y1)*u+(y3-y1)*v+y1 z=(z2-z1)*u+(z3-z1)*v+z1 ITxxE=0 ITxyE=0 ITxzE=0 ITyxE=0 ITyyE=0 ITyzE=0 ITzxE=0 ITzyE=0 ITzzE=0 IUxxE=0 IUxyE=0 IUxzE=0 IUyxE=0 IUyyE=0 IUyzE=0 IUzxE=0 IUzyE=0 IUzzE=0 do h=1,256 rr=sqrt((xx(h)-xbar(i))**2+(y(h)-ybar(i))**2+(z(h)-zbar(i))**2) drbydx=(xx(h)-xbar(i))/rr drbydy=(y(h)-ybar(i))/rr drbydz=(z(h)-zbar(i))/rr costheta=(1/rr)*((xx(h)-xbar(i))*nx(int(fbcelements(j,1)))+(y(h)-ybar(i))*ny(int(fbcelements(j,1)))+ & (z(h)-zbar(i))*nz(int(fbcelements(j,1)))) Txx=(-C2/rr**2)*((C3+3*drbydx**2)*costheta) Tyy=(-C2/rr**2)*((C3+3*drbydy**2)*costheta) Tzz=(-C2/rr**2)*((C3+3*drbydz**2)*costheta) Txy=(-C2/rr**2)*(3*drbydx*drbydy*costheta-C3*(ny(int(fbcelements(j,1)))*drbydx-nx(int(fbcelements(j,1)))*drbydy)) Tyx=(-C2/rr**2)*(3*drbydy*drbydx*costheta-C3*(nx(int(fbcelements(j,1)))*drbydy-ny(int(fbcelements(j,1)))*drbydx)) Tyz=(-C2/rr**2)*(3*drbydy*drbydz*costheta-C3*(nz(int(fbcelements(j,1)))*drbydy-ny(int(fbcelements(j,1)))*drbydz)) Tzy=(-C2/rr**2)*(3*drbydz*drbydy*costheta-C3*(ny(int(fbcelements(j,1)))*drbydz-nz(int(fbcelements(j,1)))*drbydy)) Txz=(-C2/rr**2)*(3*drbydx*drbydz*costheta-C3*(nz(int(fbcelements(j,1)))*drbydx-nx(int(fbcelements(j,1)))*drbydz)) Tzx=(-C2/rr**2)*(3*drbydz*drbydx*costheta-C3*(nx(int(fbcelements(j,1)))*drbydz-nz(int(fbcelements(j,1)))*drbydx)) ITxxE=ITxxE+Txx*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITxyE=ITxyE+Txy*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITxzE=ITxzE+Txz*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITyxE=ITyxE+Tyx*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITyyE=ITyyE+Tyy*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITyzE=ITyzE+Tyz*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITzxE=ITzxE+Tzx*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITzyE=ITzyE+Tzy*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) ITzzE=ITzzE+Tzz*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) Uxx=(CC/rr)*(C1+drbydx**2) Uyy=(CC/rr)*(C1+drbydy**2) Uzz=(CC/rr)*(C1+drbydz**2) Uxy=(CC/rr)*drbydx*drbydy Uyx=Uxy Uyz=(CC/rr)*drbydy*drbydz Uzy=Uyz Uxz=(CC/rr)*drbydx*drbydz Uzx=Uxz IUxxE=IUxxE+Uxx*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUxyE=IUxyE+Uxy*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUxzE=IUxzE+Uxz*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUyxE=IUyxE+Uyx*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUyyE=IUyyE+Uyy*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUyzE=IUyzE+Uyz*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUzxE=IUzxE+Uzx*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUzyE=IUzyE+Uzy*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) IUzzE=IUzzE+Uzz*(1-v(h))*jacobian(int(fbcelements(j,1)))*weight(h) enddo ITxxE=0.5*ITxxE ITxyE=0.5*ITxyE ITxzE=0.5*ITxzE ITyxE=0.5*ITyxE ITyyE=0.5*ITyyE ITyzE=0.5*ITyzE ITzxE=0.5*ITzxE ITzyE=0.5*ITzyE ITzzE=0.5*ITzzE IUxxE=0.5*IUxxE IUxyE=0.5*IUxyE IUxzE=0.5*IUxzE IUyxE=0.5*IUyxE IUyyE=0.5*IUyyE IUyzE=0.5*IUyzE IUzxE=0.5*IUzxE IUzyE=0.5*IUzyE IUzzE=0.5*IUzzE CPVITxxE(i)=CPVITxxE(i)-ITxxE CPVITxyE(i)=CPVITxyE(i)-ITxyE CPVITxzE(i)=CPVITxzE(i)-ITxzE CPVITyxE(i)=CPVITyxE(i)-ITyxE CPVITyyE(i)=CPVITyyE(i)-ITyyE CPVITyzE(i)=CPVITyzE(i)-ITyzE CPVITzxE(i)=CPVITzxE(i)-ITzxE CPVITzyE(i)=CPVITzyE(i)-ITzyE CPVITzzE(i)=CPVITzzE(i)-ITzzE if (i==int(fbcelements(j,1))) then CPVITxxE(i)=CPVITxxE(i)+ITxxE CPVITxyE(i)=CPVITxyE(i)+ITxyE CPVITxzE(i)=CPVITxzE(i)+ITxzE CPVITyxE(i)=CPVITyxE(i)+ITyxE CPVITyyE(i)=CPVITyyE(i)+ITyyE CPVITyzE(i)=CPVITyzE(i)+ITyzE CPVITzxE(i)=CPVITzxE(i)+ITzxE CPVITzyE(i)=CPVITzyE(i)+ITzyE CPVITzzE(i)=CPVITzzE(i)+ITzzE endif K(3*i-2,3*int(fbcelements(j,1))-2)=ITxxE K(3*i-2,3*int(fbcelements(j,1))-1)=ITxyE K(3*i-2,3*int(fbcelements(j,1)))=ITxzE K(3*i-1,3*int(fbcelements(j,1))-2)=ITyxE K(3*i-1,3*int(fbcelements(j,1))-1)=ITyyE K(3*i-1,3*int(fbcelements(j,1)))=ITyzE K(3*i,3*int(fbcelements(j,1))-2)=ITzxE K(3*i,3*int(fbcelements(j,1))-1)=ITzyE K(3*i,3*int(fbcelements(j,1)))=ITzzE F(3*i-2,1)=F(3*i-2,1)+fbcelements(j,2)*IUxxE+fbcelements(j,3)*IUxyE+fbcelements(j,4)*IUxzE F(3*i-1,1)=F(3*i-1,1)+fbcelements(j,2)*IUyxE+fbcelements(j,3)*IUyyE+fbcelements(j,4)*IUyzE F(3*i,1)=F(3*i,1)+fbcelements(j,2)*IUzxE+fbcelements(j,3)*IUzyE+fbcelements(j,4)*IUzzE enddo do j=1,ndispbcelements x1=xyzofelements(3*int(dispbcelements(j,1))-2,2) y1=xyzofelements(3*int(dispbcelements(j,1))-2,3) z1=xyzofelements(3*int(dispbcelements(j,1))-2,4) x2=xyzofelements(3*int(dispbcelements(j,1))-1,2) y2=xyzofelements(3*int(dispbcelements(j,1))-1,3) z2=xyzofelements(3*int(dispbcelements(j,1))-1,4) x3=xyzofelements(3*int(dispbcelements(j,1)),2) y3=xyzofelements(3*int(dispbcelements(j,1)),3) z3=xyzofelements(3*int(dispbcelements(j,1)),4) xx=(x2-x1)*u+(x3-x1)*v+x1 y=(y2-y1)*u+(y3-y1)*v+y1 z=(z2-z1)*u+(z3-z1)*v+z1 ITxxE=0 ITxyE=0 ITxzE=0 ITyxE=0 ITyyE=0 ITyzE=0 ITzxE=0 ITzyE=0 ITzzE=0 IUxxE=0 IUxyE=0 IUxzE=0 IUyxE=0 IUyyE=0 IUyzE=0 IUzxE=0 IUzyE=0 IUzzE=0 do h=1,256 rr=sqrt((xx(h)-xbar(i))**2+(y(h)-ybar(i))**2+(z(h)-zbar(i))**2) drbydx=(xx(h)-xbar(i))/rr drbydy=(y(h)-ybar(i))/rr drbydz=(z(h)-zbar(i))/rr costheta=(1/rr)*((xx(h)-xbar(i))*nx(int(dispbcelements(j,1)))+(y(h)-ybar(i))*ny(int(dispbcelements(j,1)))+ & (z(h)-zbar(i))*nz(int(dispbcelements(j,1)))) Txx=(-C2/rr**2)*((C3+3*drbydx**2)*costheta) Tyy=(-C2/rr**2)*((C3+3*drbydy**2)*costheta) Tzz=(-C2/rr**2)*((C3+3*drbydz**2)*costheta) Txy=(-C2/rr**2)*(3*drbydx*drbydy*costheta-C3*(ny(int(dispbcelements(j,1)))*drbydx-nx(int(dispbcelements(j,1)))*drbydy)) Tyx=(-C2/rr**2)*(3*drbydy*drbydx*costheta-C3*(nx(int(dispbcelements(j,1)))*drbydy-ny(int(dispbcelements(j,1)))*drbydx)) Tyz=(-C2/rr**2)*(3*drbydy*drbydz*costheta-C3*(nz(int(dispbcelements(j,1)))*drbydy-ny(int(dispbcelements(j,1)))*drbydz)) Tzy=(-C2/rr**2)*(3*drbydz*drbydy*costheta-C3*(ny(int(dispbcelements(j,1)))*drbydz-nz(int(dispbcelements(j,1)))*drbydy)) Txz=(-C2/rr**2)*(3*drbydx*drbydz*costheta-C3*(nz(int(dispbcelements(j,1)))*drbydx-nx(int(dispbcelements(j,1)))*drbydz)) Tzx=(-C2/rr**2)*(3*drbydz*drbydx*costheta-C3*(nx(int(dispbcelements(j,1)))*drbydz & -nz(int(dispbcelements(j,1)))*drbydx)) ITxxE=ITxxE+Txx*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITxyE=ITxyE+Txy*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITxzE=ITxzE+Txz*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITyxE=ITyxE+Tyx*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITyyE=ITyyE+Tyy*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITyzE=ITyzE+Tyz*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITzxE=ITzxE+Tzx*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITzyE=ITzyE+Tzy*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) ITzzE=ITzzE+Tzz*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) Uxx=(CC/rr)*(C1+drbydx**2) Uyy=(CC/rr)*(C1+drbydy**2) Uzz=(CC/rr)*(C1+drbydz**2) Uxy=(CC/rr)*drbydx*drbydy Uyx=Uxy Uyz=(CC/rr)*drbydy*drbydz Uzy=Uyz Uxz=(CC/rr)*drbydx*drbydz Uzx=Uxz IUxxE=IUxxE+Uxx*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUxyE=IUxyE+Uxy*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUxzE=IUxzE+Uxz*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUyxE=IUyxE+Uyx*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUyyE=IUyyE+Uyy*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUyzE=IUyzE+Uyz*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUzxE=IUzxE+Uzx*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUzyE=IUzyE+Uzy*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) IUzzE=IUzzE+Uzz*(1-v(h))*jacobian(int(dispbcelements(j,1)))*weight(h) enddo ITxxE=0.5*ITxxE ITxyE=0.5*ITxyE ITxzE=0.5*ITxzE ITyxE=0.5*ITyxE ITyyE=0.5*ITyyE ITyzE=0.5*ITyzE ITzxE=0.5*ITzxE ITzyE=0.5*ITzyE ITzzE=0.5*ITzzE IUxxE=0.5*IUxxE IUxyE=0.5*IUxyE IUxzE=0.5*IUxzE IUyxE=0.5*IUyxE IUyyE=0.5*IUyyE IUyzE=0.5*IUyzE IUzxE=0.5*IUzxE IUzyE=0.5*IUzyE IUzzE=0.5*IUzzE CPVITxxE(i)=CPVITxxE(i)-ITxxE CPVITxyE(i)=CPVITxyE(i)-ITxyE CPVITxzE(i)=CPVITxzE(i)-ITxzE CPVITyxE(i)=CPVITyxE(i)-ITyxE CPVITyyE(i)=CPVITyyE(i)-ITyyE CPVITyzE(i)=CPVITyzE(i)-ITyzE CPVITzxE(i)=CPVITzxE(i)-ITzxE CPVITzyE(i)=CPVITzyE(i)-ITzyE CPVITzzE(i)=CPVITzzE(i)-ITzzE if (i==int(fbcelements(j,1))) then CPVITxxE(i)=CPVITxxE(i)+ITxxE CPVITxyE(i)=CPVITxyE(i)+ITxyE CPVITxzE(i)=CPVITxzE(i)+ITxzE CPVITyxE(i)=CPVITyxE(i)+ITyxE CPVITyyE(i)=CPVITyyE(i)+ITyyE CPVITyzE(i)=CPVITyzE(i)+ITyzE CPVITzxE(i)=CPVITzxE(i)+ITzxE CPVITzyE(i)=CPVITzyE(i)+ITzyE CPVITzzE(i)=CPVITzzE(i)+ITzzE endif K(3*i-2,3*int(dispbcelements(j,1))-2)=IUxxE K(3*i-2,3*int(dispbcelements(j,1))-1)=IUxyE K(3*i-2,3*int(dispbcelements(j,1)))=IUxzE K(3*i-1,3*int(dispbcelements(j,1))-2)=IUyxE K(3*i-1,3*int(dispbcelements(j,1))-1)=IUyyE K(3*i-1,3*int(dispbcelements(j,1)))=IUyzE K(3*i,3*int(dispbcelements(j,1))-2)=IUzxE K(3*i,3*int(dispbcelements(j,1))-1)=IUzyE K(3*i,3*int(dispbcelements(j,1)))=IUzzE F(3*i-2,1)=F(3*i-2,1)+dispbcelements(j,2)*ITxxE+dispbcelements(j,3)*ITxyE+dispbcelements(j,4)*ITxzE F(3*i-1,1)=F(3*i-1,1)+dispbcelements(j,2)*ITyxE+dispbcelements(j,3)*ITyyE+dispbcelements(j,4)*ITyzE F(3*i,1)=F(3*i,1)+dispbcelements(j,2)*ITzxE+dispbcelements(j,3)*ITzyE+dispbcelements(j,4)*ITzzE if (i==int(dispbcelements(j,1))) then F(3*i-2,1)=F(3*i-2,1)-dispbcelements(j,2)*ITxxE-dispbcelements(j,3)*ITxyE-dispbcelements(j,4)*ITxzE F(3*i-1,1)=F(3*i-1,1)-dispbcelements(j,2)*ITyxE-dispbcelements(j,3)*ITyyE-dispbcelements(j,4)*ITyzE F(3*i,1)=F(3*i,1)-dispbcelements(j,2)*ITzxE-dispbcelements(j,3)*ITzyE-dispbcelements(j,4)*ITzzE endif enddo enddo call blacs_barrier(icontxt,'A') ! The following 'do' loop is executed on all processes but for different values of i's (embarrasingly parallel) do i=block*mypnum+1,min(block*mypnum+block,nelement) do j=1,nfbcelements if (i==int(fbcelements(j,1))) then K(3*i-2,3*int(fbcelements(j,1))-2)=CPVITxxE(i) K(3*i-2,3*int(fbcelements(j,1))-1)=CPVITxyE(i) K(3*i-2,3*int(fbcelements(j,1)))=CPVITxzE(i) K(3*i-1,3*int(fbcelements(j,1))-2)=CPVITyxE(i) K(3*i-1,3*int(fbcelements(j,1))-1)=CPVITyyE(i) K(3*i-1,3*int(fbcelements(j,1)))=CPVITyzE(i) K(3*i,3*int(fbcelements(j,1))-2)=CPVITzxE(i) K(3*i,3*int(fbcelements(j,1))-1)=CPVITzyE(i) K(3*i,3*int(fbcelements(j,1)))=CPVITzzE(i) endif enddo do j=1,ndispbcelements if (i==int(dispbcelements(j,1))) then F(3*i-2,1)=F(3*i-2,1)+dispbcelements(j,2)*CPVITxxE(i)+dispbcelements(j,3)*CPVITxyE(i)+dispbcelements(j,4)*CPVITxzE(i) F(3*i-1,1)=F(3*i-1,1)+dispbcelements(j,2)*CPVITyxE(i)+dispbcelements(j,3)*CPVITyyE(i)+dispbcelements(j,4)*CPVITyzE(i) F(3*i,1)=F(3*i,1)+dispbcelements(j,2)*CPVITzxE(i)+dispbcelements(j,3)*CPVITzyE(i)+dispbcelements(j,4)*CPVITzzE(i) endif enddo enddo call blacs_barrier(icontxt,'A') ! 'Assemble' 'K' and 'F' from pieces from different processes. 'Assembled' 'K' and 'F' are stored on every process. call dgamx2d(icontxt,'A',' ',3*nelement,3*nelement,K,3*nelement,ra,ca,-1,-1,-1) call dgamx2d(icontxt,'A',' ',3*nelement,1,F,3*nelement,ra,ca,-1,-1,-1) call blacs_barrier(icontxt,'A') ! Determine the sizes of local arrays on each of the processes. Following four lines execute on all of the processes (separately). lldK=NUMROC(3*nelement, mbK, myrow, 0, nprocsr) lldF=NUMROC(3*nelement, mbF, myrow, 0, nprocsr) lldKc=NUMROC(3*nelement, nbK, mycol, 0, nprocsc) lldFc=1 call blacs_barrier(icontxt,'A') ! Allocate local arrays on each of the processes. Following four lines execute on all of the processes (separately). allocate (Kp(lldK,lldKc)) allocate (Fp(lldF,1)) allocate (ipiv(lldK*lldKc+mbK)) call blacs_barrier(icontxt,'A') ! The following two 'do' loops construct 'Kp' and 'Fp' on each of the processes, separately. Effectively, they Block-Cyclically 'distribute' the matrices 'K' and 'F' over all the processes in the process grid. do i=lldK,1,-1 ia=INDXL2G(i,mbK,myrow,0,nprocsr) do j=lldKc,1,-1 ja=INDXL2G(j,nbK,mycol,0,nprocsc) Kp(i,j)=K(ia,ja) enddo enddo do i=lldF,1,-1 ib=INDXL2G(i,mbF,myrow,0,nprocsr) Fp(i,1)=F(ib,1) enddo call blacs_barrier(icontxt,'A') CALL DESCINIT(descK,mK,nK,mbK,nbK,0,0,icontxt,lldK,info) CALL DESCINIT(descF,mF,nF,mbF,nbF,0,0,icontxt,lldF,info) call blacs_barrier(icontxt,'A') ! Solve the system of equations on multiple processors. The matrix 'K' itself is the 'submatrix' in 'pdgesv' call pdgesv(3*nelement,1,Kp,1,1,descK,ipiv,Fp,1,1,descF,info) call blacs_barrier(icontxt,'A') ! 'Reverse' the Block-Cyclic Distribution of the solution ('do' loop executes on all the processes, separately) do i=1,lldF X(INDXL2G(i,mbF,myrow,0,nprocsr),1)=Fp(i,1) enddo call blacs_barrier(icontxt,'A') ! 'Assemble' 'X' from pieces from different processes. 'Assembled' 'X' is stored in process '0' only. 'X' is the solution. call dgamx2d(icontxt,'A',' ',3*nelement,1,X,3*nelement,ra,ca,-1,0,0) call blacs_barrier(icontxt,'A') ! The following 'if' statement executes on process '0' only if (mypnum==0) then finish=MPI_WTIME() time=finish-start open(102, file="outbem96p.txt", status="unknown", action="write", form="formatted", IOSTAT=istat) write (102, *) X close(102) open(103, file="timebem96p.txt", status="unknown", action="write", form="formatted", IOSTAT=istat) write (103, *) time close(103) write (*, *) X write (*, *) time endif call blacs_barrier(icontxt,'A') ! The following two lines execute on each of the processes, separately deallocate (nx,ny,nz,xbar,ybar,zbar,jacobian) deallocate (CPVITxxE,CPVITxyE,CPVITxzE,CPVITyxE,CPVITyyE,CPVITyzE,CPVITzxE,CPVITzyE,CPVITzzE,K,F,Kp,Fp) call blacs_barrier(icontxt,'A') call blacs_exit(0) end program bemconstp