forked from lgcrego/Dynemol
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathElHl_Chebyshev_GPU.f
323 lines (257 loc) · 11.3 KB
/
ElHl_Chebyshev_GPU.f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
#ifdef USE_GPU
module gpu_ElHl_Chebyshev_m
use MPI
use type_m , g_time => f_time
use blas95
use lapack95
use constants_m
use ifport
use Matrix_Math
use MPI_definitions_m , only : master , EnvCrew , myCheby , &
ChebyComm , ChebyCrew , &
ForceComm , ForceCrew , &
KernelComm , ChebyKernelComm
use parameters_m , only : t_i , frame_step , Coulomb_ , &
EnvField_ , n_part, driver , &
QMMM, CT_dump_step , HFP_Forces
use Structure_Builder , only : Unit_Cell
use Overlap_Builder , only : Overlap_Matrix
use FMO_m , only : FMO_analysis , eh_tag
use Data_Output , only : Populations
use Hamiltonians , only : X_ij , even_more_extended_Huckel
use Taylor_m , only : Propagation, dump_Qdyn
use ElHl_Chebyshev_m , only : Build_Huckel , &
QuasiParticleEnergies , &
preprocess_from_restart
public :: gpu_ElHl_Chebyshev, gpu_preprocess_ElHl_Chebyshev
private
! module parameters ...
integer , parameter :: order = 25
real*8 , parameter :: error = 1.0d-12
real*8 , parameter :: norm_error = 1.0d-12
! module variables ...
logical , save :: first_call_ = .true.
real*8 , save :: save_tau
real*8 , allocatable :: h0(:,:)
real*8 , allocatable :: H_prime(:,:)
real*8 , allocatable :: S_matrix(:,:)
complex*16 , allocatable :: Psi_t_bra(:,:) , Psi_t_ket(:,:)
interface gpu_preprocess_ElHl_Chebyshev
module procedure gpu_preprocess_ElHl_Chebyshev
module procedure preprocess_from_restart
end interface
contains
!
!
!
!==============================================================================================================
subroutine gpu_preprocess_ElHl_Chebyshev( system , basis , AO_bra , AO_ket , Dual_bra , Dual_ket , QDyn , it )
! used for normal start (see interface) ...
!==============================================================================================================
implicit none
type(structure) , intent(inout) :: system
type(STO_basis) , intent(inout) :: basis(:)
complex*16 , intent(out) :: AO_bra(:,:)
complex*16 , intent(out) :: AO_ket(:,:)
complex*16 , intent(out) :: DUAL_bra(:,:)
complex*16 , intent(out) :: DUAL_ket(:,:)
type(g_time) , intent(inout) :: QDyn
integer , intent(in) :: it
!local variables ...
integer :: li , M , N , err
integer :: mpi_D_R = mpi_double_precision
real*8 , allocatable :: wv_FMO(:)
complex*16 , allocatable :: ElHl_Psi(:,:)
type(R_eigen) :: FMO
! MUST compute S_matrix before FMO analysis ...
CALL Overlap_Matrix( system , basis , S_matrix )
N = size(basis)
allocate( h0(N,N), H_prime(N,N) )
!------------------------------------------------------------------------
If( master .OR. EnvCrew ) then
If( EnvField_ ) then
! EnCrew stay in even_more_extended_Huckel ...
h0(:,:) = even_more_extended_Huckel( system , basis , S_matrix )
Else
h0(:,:) = Build_Huckel( basis , S_matrix )
end If
CALL MPI_BCAST( h0 , N*N , mpi_D_R , 0 , ForceComm , err )
CALL MPI_BCAST( h0 , N*N , mpi_D_R , 0 , ChebyComm , err )
End If
If( ForceCrew ) then
! After instantiating S_matrix AND h0, ForceCrew leave to EhrenfestForce ...
CALL MPI_BCAST( h0 , N*N , mpi_D_R , 0 , ForceComm , err )
return
End If
! Another ChebyCrew mate ...
If( myCheby == 1 ) CALL MPI_BCAST( h0 , N*N , mpi_D_R , 0 , ChebyComm , err )
!------------------------------------------------------------------------
! pin (for faster CPU <-> GPU transfers)
call GPU_Pin( S_matrix, n*n*8 )
call GPU_Pin( h0, n*n*8 )
call GPU_Pin( H_prime, n*n*8 )
allocate( ElHl_Psi( N , n_part ) , source=C_zero )
!========================================================================
! prepare electron state ...
CALL FMO_analysis( system , basis, FMO=FMO , MO=wv_FMO , instance="E" )
! place the electron state in Structure's Hilbert space ...
li = minloc( basis%indx , DIM = 1 , MASK = basis%El )
M = size(wv_FMO)
ElHl_Psi(li:li+M-1,1) = merge( dcmplx(wv_FMO(:)) , C_zero , eh_tag(1) == "el")
deallocate( wv_FMO )
!========================================================================
! prepare hole state ...
CALL FMO_analysis( system , basis, FMO=FMO , MO=wv_FMO , instance="H" )
! place the hole state in Structure's Hilbert space ...
li = minloc( basis%indx , DIM = 1 , MASK = basis%Hl )
M = size(wv_FMO)
ElHl_Psi(li:li+M-1,2) = merge( dcmplx(wv_FMO(:)) , C_zero , eh_tag(2) == "hl")
deallocate( wv_FMO )
!========================================================================
!==============================================
! prepare DUAL basis for local properties ...
! DUAL_bra = (C*)^T ; DUAL_ket = S*C ...
DUAL_bra = dconjg( ElHl_Psi )
call op_x_ket( DUAL_ket, S_matrix , ElHl_Psi )
!==============================================
!==============================================
! vector states to be propagated ...
! Psi_bra = C^T*S ; Psi_ket = C ...
allocate( Psi_t_bra(N,n_part) )
allocate( Psi_t_ket(N,n_part) )
call bra_x_op( Psi_t_bra, ElHl_Psi , S_matrix )
Psi_t_ket = ElHl_Psi
!==============================================
!==============================================
! preprocess stuff for EhrenfestForce ...
AO_bra = ElHl_Psi
AO_ket = ElHl_Psi
CALL QuasiParticleEnergies(AO_bra, AO_ket, H0)
!==============================================
! save populations(time=t_i) ...
QDyn%dyn(it,:,:) = Populations( QDyn%fragments , basis , DUAL_bra , DUAL_ket , t_i )
if (master) CALL dump_Qdyn( Qdyn , it )
! leaving S_matrix allocated
end subroutine gpu_preprocess_ElHl_Chebyshev
!
!
!
!=================================================================================================================
subroutine gpu_ElHl_Chebyshev( system , basis , AO_bra , AO_ket , Dual_bra , Dual_ket , QDyn , t , delta_t , it )
!=================================================================================================================
implicit none
type(structure) , intent(in) :: system
type(STO_basis) , intent(in) :: basis(:)
complex*16 , intent(inout) :: AO_bra(:,:)
complex*16 , intent(inout) :: AO_ket(:,:)
complex*16 , intent(inout) :: Dual_bra(:,:)
complex*16 , intent(inout) :: Dual_ket(:,:)
type(g_time) , intent(inout) :: QDyn
real*8 , intent(inout) :: t
real*8 , intent(in) :: delta_t
integer , intent(in) :: it
! local variables...
logical, save :: first_call = .true.
logical :: do_electron, do_hole
integer :: N, my_part, err, mpi_status(mpi_status_size), request_H_prime, t_stuff_sent, coords_sent, it_sync
integer :: mpi_D_R = mpi_double_precision
integer :: mpi_D_C = mpi_double_complex
real*8 :: t_init , t_max , tau_max , tau , t_stuff(2)
N = size(basis)
t_init = t
! max time inside slice ...
t_max = delta_t*frame_step*(it-1)
! constants of evolution ...
tau_max = delta_t / h_bar
!----------------------------------
! Organize which process does what:
! (master) <-> (myCheby==0) process: electron
do_electron = master
! (myCheby==1) process: hole
do_hole = merge( .true., .false., (myCheby==1) )
! check if process doesn't belong here
if (.not. (do_electron .or. do_hole)) return
! define index to address electron (1) or hole (2) in Psi_t_bra/ket
if (do_electron) my_part = 1
if (do_hole) my_part = 2
!----------------------------------
! begin
10 continue
! compute overlap and hamiltonian
if(.not. first_call) then
if (do_electron) then
call MPI_Isend( system%coord, system%atoms*3, mpi_D_R, 1, 0, ChebyComm, coords_sent, err )
call MPI_Request_Free( coords_sent, err )
else
call MPI_Recv( system%coord, system%atoms*3, mpi_D_R, 0, 0, ChebyComm, mpi_status, err )
end if
call Overlap_Matrix( system, basis, S_matrix )
If( Envfield_ ) then
it_sync = it-1 ! <== for synchronizing EnvSetUp call therein ...
h0(:,:) = even_more_extended_Huckel( system , basis , S_matrix , it_sync)
else
h0(:,:) = Build_Huckel( basis , S_matrix )
end If
end if
! master bcasts QMMM
call MPI_Bcast( QMMM, 1, mpi_logical, 0, ChebyComm, err )
! master/slave sends/receives time info
if (do_electron) then
t_stuff = [t_init, t_max]
call MPI_Isend( t_stuff, 2, mpi_D_R, 1, 0, ChebyComm, t_stuff_sent, err ) ! ──┐
call MPI_Request_Free( t_stuff_sent, err ) ! │
else if (do_hole) then ! │
call MPI_Recv( t_stuff, 2, mpi_D_R, 0, 0, ChebyComm, mpi_status, err ) ! <──┘
t_init = t_stuff(1)
t_max = t_stuff(2)
end if
! trying to adapt time step for efficient propagation ...
tau = merge( tau_max, save_tau * 1.15d0, first_call )
! but tau should be never bigger than tau_max ...
tau = merge( tau_max, tau, tau > tau_max )
first_call = .false.
! send AO_bra/ket to Diabatic_Ehrenfest
if (do_electron .and. QMMM) then
call MPI_Bcast( AO_bra, 2*N, mpi_D_C, 0, KernelComm, err )
call MPI_Bcast( AO_ket, 2*N, mpi_D_C, 0, KernelComm, err )
end if
! propagate electron or hole
call PropagationElHl_gpucaller( N, S_matrix, h0, H_prime, &
AO_bra (:,my_part), AO_ket (:,my_part), &
Psi_t_bra(:,my_part), Psi_t_ket(:,my_part), &
t_init, t_max, tau, save_tau )
! remark: on return, AO_bra = S_inv * Psi_t_bra. AO_bra still lacks conjugation
! send/receive hole
if (do_hole) then
call MPI_Send( Psi_t_bra(:,2), N, mpi_D_C, 0, 0, ChebyComm, err )
call MPI_Send( Psi_t_ket(:,2), N, mpi_D_C, 0, 0, ChebyComm, err )
call MPI_Send( AO_bra(:,2), N, mpi_D_C, 0, 0, ChebyComm, err )
else if (do_electron) then
call MPI_Recv( Psi_t_bra(:,2), N, mpi_D_C, 1, 0, ChebyComm, mpi_status, err )
call MPI_Recv( Psi_t_ket(:,2), N, mpi_D_C, 1, 0, ChebyComm, mpi_status, err )
call MPI_Recv( AO_bra(:,2), N, mpi_D_C, 1, 0, ChebyComm, mpi_status, err )
end if
! (myCheb==1) process keeps going back forever
if (do_hole) goto 10 !begin
! only master (do_electron) reaches this part of the code
! send H_prime to process (myChebyKernel==2)
if (QMMM) call MPI_Isend( H_prime, N*N, mpi_D_R, 2, 0, ChebyKernelComm, request_H_prime, err )
t = t_init + (delta_t*frame_step)
! prepare DUAL basis for local properties ...
DUAL_bra = dconjg(Psi_t_ket)
DUAL_ket = Psi_t_bra
! prepare Slater basis for FORCE properties ...
AO_bra = dconjg(AO_bra) ! conjugation of AO_bra is not done on the GPU
AO_ket = Psi_t_ket
call QuasiParticleEnergies( AO_bra, AO_ket, h0 )
! save populations(time) ...
QDyn%dyn(it,:,:) = Populations( QDyn%fragments, basis, DUAL_bra, DUAL_ket, t )
if( mod(it,CT_dump_step) == 0 ) CALL dump_Qdyn( Qdyn , it )
call MPI_Wait( request_H_prime, mpi_status, err )
include 'formats.h'
end subroutine gpu_ElHl_Chebyshev
!
!
!
end module gpu_ElHl_Chebyshev_m
#endif