Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analytical PCM 2nd derivative #302

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
410 changes: 410 additions & 0 deletions gpu4pyscf/gto/int3c1e_ipip.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion gpu4pyscf/gto/tests/test_int1e_grids_ip.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,5 +364,5 @@ def test_int1e_grids_ip1_density_contracted(self):
cp.testing.assert_allclose(ref_int1e_dA, test_int1e_dA, atol = integral_threshold)

if __name__ == "__main__":
print("Full Tests for One Electron Coulomb Integrals")
print("Full Tests for One Electron Coulomb Integrals 1st Derivative")
unittest.main()
480 changes: 480 additions & 0 deletions gpu4pyscf/gto/tests/test_int1e_grids_ipip.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions gpu4pyscf/lib/gint/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_library(gint SHARED
nr_fill_ao_ints.cu
nr_fill_ao_int3c1e.cu
nr_fill_ao_int3c1e_ip.cu
nr_fill_ao_int3c1e_ipip.cu
nr_fill_ao_int3c2e.cu
nr_fill_ao_int3c2e_ip1.cu
nr_fill_ao_int3c2e_ip2.cu
Expand Down
635 changes: 635 additions & 0 deletions gpu4pyscf/lib/gint/g3c1e_ipip.cu

Large diffs are not rendered by default.

361 changes: 361 additions & 0 deletions gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ipip.cu

Large diffs are not rendered by default.

130 changes: 123 additions & 7 deletions gpu4pyscf/lib/solvent/pcm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ static void _pcm_d_s(double *matrix_d, double *matrix_s,

__global__
static void _pcm_dD_dS(double *matrix_dd, double *matrix_ds,
const double *coords, const double *norm_vec, const double *r_vdw,
const double *charge_exp, const double *switch_fun,
int n)
const double *coords, const double *norm_vec,
const double *charge_exp,
int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
Expand Down Expand Up @@ -130,6 +130,105 @@ static void _pcm_dD_dS(double *matrix_dd, double *matrix_ds,
}
}


__global__
static void _pcm_d2D_d2S(double *matrix_d2D, double *matrix_d2S,
const double *coords, const double *norm_vec,
const double *charge_exp,
int n)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
const int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i >= n || j >= n) {
return;
}

// calculate xi
const double ei = charge_exp[i];
const double ej = charge_exp[j];
const double eij = ei * ej / sqrt(ei*ei + ej*ej);

// calculate r
const double dx = coords[3*i] - coords[3*j];
const double dy = coords[3*i+1] - coords[3*j+1];
const double dz = coords[3*i+2] - coords[3*j+2];
const double rij = norm3d(dx, dy, dz);
const double rij_1 = 1.0 / rij;
const double rij_2 = rij_1 * rij_1;
const double rij_3 = rij_2 * rij_1;
const double rij_4 = rij_2 * rij_2;
const double rij_5 = rij_2 * rij_3;
const double eij2 = eij * eij;

const double eij_rij = eij * rij;
const double erf_eij_rij = erf(eij_rij);
const double exp_minus_eij2_rij2 = exp(-eij_rij * eij_rij);
const double two_eij_over_sqrt_pi = 2.0 * eij / SQRT_PI;
const double two_eij_over_sqrt_pi_exp_minus_eij2_rij2 = exp_minus_eij2_rij2 * two_eij_over_sqrt_pi;

const double S_direct_product_prefactor = -two_eij_over_sqrt_pi_exp_minus_eij2_rij2 * (3 * rij_4 + 2 * eij2 * rij_2)
+ 3 * rij_5 * erf_eij_rij;
const double S_xyz_diagonal_prefactor = two_eij_over_sqrt_pi_exp_minus_eij2_rij2 * rij_2 - rij_3 * erf_eij_rij;

const int n2 = n * n;
if (i == j) {
matrix_d2S[i*n + j ] = 0.0;
matrix_d2S[i*n + j + n2 ] = 0.0;
matrix_d2S[i*n + j + n2 * 2] = 0.0;
matrix_d2S[i*n + j + n2 * 3] = 0.0;
matrix_d2S[i*n + j + n2 * 4] = 0.0;
matrix_d2S[i*n + j + n2 * 5] = 0.0;
matrix_d2S[i*n + j + n2 * 6] = 0.0;
matrix_d2S[i*n + j + n2 * 7] = 0.0;
matrix_d2S[i*n + j + n2 * 8] = 0.0;
} else {
matrix_d2S[i*n + j ] = dx * dx * S_direct_product_prefactor + S_xyz_diagonal_prefactor;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Set S_direct_product_prefactor = 0 if i == j? The if-else logic may slow down GPU memory access.

matrix_d2S[i*n + j + n2 ] = dx * dy * S_direct_product_prefactor;
matrix_d2S[i*n + j + n2 * 2] = dx * dz * S_direct_product_prefactor;
matrix_d2S[i*n + j + n2 * 3] = dy * dx * S_direct_product_prefactor;
matrix_d2S[i*n + j + n2 * 4] = dy * dy * S_direct_product_prefactor + S_xyz_diagonal_prefactor;
matrix_d2S[i*n + j + n2 * 5] = dy * dz * S_direct_product_prefactor;
matrix_d2S[i*n + j + n2 * 6] = dz * dx * S_direct_product_prefactor;
matrix_d2S[i*n + j + n2 * 7] = dz * dy * S_direct_product_prefactor;
matrix_d2S[i*n + j + n2 * 8] = dz * dz * S_direct_product_prefactor + S_xyz_diagonal_prefactor;
}

if (matrix_d2D != NULL) {
const double nxj = norm_vec[3*j];
const double nyj = norm_vec[3*j+1];
const double nzj = norm_vec[3*j+2];
const double nj_rij = dx * nxj + dy * nyj + dz * nzj;

const double eij4 = eij2 * eij2;
const double rij_6 = rij_4 * rij_2;
const double rij_7 = rij_4 * rij_3;

const double D_direct_product_prefactor = (-two_eij_over_sqrt_pi_exp_minus_eij2_rij2 * (15 * rij_6 + 10 * eij2 * rij_4 + 4 * eij4 * rij_2)
+ 15 * rij_7 * erf_eij_rij) * nj_rij;
if (i == j) {
matrix_d2D[i*n + j ] = 0.0;
matrix_d2D[i*n + j + n2 ] = 0.0;
matrix_d2D[i*n + j + n2 * 2] = 0.0;
matrix_d2D[i*n + j + n2 * 3] = 0.0;
matrix_d2D[i*n + j + n2 * 4] = 0.0;
matrix_d2D[i*n + j + n2 * 5] = 0.0;
matrix_d2D[i*n + j + n2 * 6] = 0.0;
matrix_d2D[i*n + j + n2 * 7] = 0.0;
matrix_d2D[i*n + j + n2 * 8] = 0.0;
} else {
matrix_d2D[i*n + j ] = D_direct_product_prefactor * dx * dx - S_direct_product_prefactor * (dx * nxj + dx * nxj + nj_rij);
matrix_d2D[i*n + j + n2 ] = D_direct_product_prefactor * dx * dy - S_direct_product_prefactor * (dy * nxj + dx * nyj);
matrix_d2D[i*n + j + n2 * 2] = D_direct_product_prefactor * dx * dz - S_direct_product_prefactor * (dz * nxj + dx * nzj);
matrix_d2D[i*n + j + n2 * 3] = D_direct_product_prefactor * dy * dx - S_direct_product_prefactor * (dx * nyj + dy * nxj);
matrix_d2D[i*n + j + n2 * 4] = D_direct_product_prefactor * dy * dy - S_direct_product_prefactor * (dy * nyj + dy * nyj + nj_rij);
matrix_d2D[i*n + j + n2 * 5] = D_direct_product_prefactor * dy * dz - S_direct_product_prefactor * (dz * nyj + dy * nzj);
matrix_d2D[i*n + j + n2 * 6] = D_direct_product_prefactor * dz * dx - S_direct_product_prefactor * (dx * nzj + dz * nxj);
matrix_d2D[i*n + j + n2 * 7] = D_direct_product_prefactor * dz * dy - S_direct_product_prefactor * (dy * nzj + dz * nyj);
matrix_d2D[i*n + j + n2 * 8] = D_direct_product_prefactor * dz * dz - S_direct_product_prefactor * (dz * nzj + dz * nzj + nj_rij);
}
}
}

extern "C" {
int pcm_d_s(cudaStream_t stream, double *matrix_d, double *matrix_s,
const double *coords, const double *norm_vec, const double *r_vdw,
Expand All @@ -149,15 +248,32 @@ int pcm_d_s(cudaStream_t stream, double *matrix_d, double *matrix_s,
}

int pcm_dd_ds(cudaStream_t stream, double *matrix_dD, double *matrix_dS,
const double *coords, const double *norm_vec, const double *r_vdw,
const double *charge_exp, const double *switch_fun,
int n)
const double *coords, const double *norm_vec,
const double *charge_exp,
int n)
{
int ntilex = (n + THREADS - 1) / THREADS;
int ntiley = (n + THREADS - 1) / THREADS;
dim3 threads(THREADS, THREADS);
dim3 blocks(ntilex, ntiley);
_pcm_dD_dS<<<blocks, threads, 0, stream>>>(matrix_dD, matrix_dS, coords, norm_vec, r_vdw, charge_exp, switch_fun, n);
_pcm_dD_dS<<<blocks, threads, 0, stream>>>(matrix_dD, matrix_dS, coords, norm_vec, charge_exp, n);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
return 1;
}
return 0;
}

int pcm_d2d_d2s(cudaStream_t stream, double *matrix_d2D, double *matrix_d2S,
const double *coords, const double *norm_vec,
const double *charge_exp,
int n)
{
const int ntilex = (n + THREADS - 1) / THREADS;
const int ntiley = (n + THREADS - 1) / THREADS;
const dim3 threads(THREADS, THREADS);
const dim3 blocks(ntilex, ntiley);
_pcm_d2D_d2S<<<blocks, threads, 0, stream>>>(matrix_d2D, matrix_d2S, coords, norm_vec, charge_exp, n);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
return 1;
Expand Down
111 changes: 81 additions & 30 deletions gpu4pyscf/solvent/grad/pcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,6 @@ def grad_switch_h(x):
dy[x>1] = 0.0
return dy

def gradgrad_switch_h(x):
''' 2nd derivative of h(x) '''
ddy = 60.0*x - 180.0*x**2 + 120*x**3
ddy[x<0] = 0.0
ddy[x>1] = 0.0
return ddy

def get_dF_dA(surface):
'''
J. Chem. Phys. 133, 244111 (2010), Appendix C
Expand All @@ -63,10 +56,9 @@ def get_dF_dA(surface):
dF = cupy.zeros([ngrids, natom, 3])
dA = cupy.zeros([ngrids, natom, 3])

for ia in range(atom_coords.shape[0]):
for ia in range(natom):
p0,p1 = surface['gslice_by_atom'][ia]
coords = grid_coords[p0:p1]
p1 = p0 + coords.shape[0]
ri_rJ = cupy.expand_dims(coords, axis=1) - atom_coords
riJ = cupy.linalg.norm(ri_rJ, axis=-1)
diJ = (riJ - R_in_J) / R_sw_J
Expand Down Expand Up @@ -145,9 +137,7 @@ def get_dD_dS(surface, with_S=True, with_D=False, stream=None):
'''
charge_exp = surface['charge_exp']
grid_coords = surface['grid_coords']
switch_fun = surface['switch_fun']
norm_vec = surface['norm_vec']
R_vdw = surface['R_vdw']
n = charge_exp.shape[0]
dS = cupy.empty([3,n,n])
dD = None
Expand All @@ -163,9 +153,7 @@ def get_dD_dS(surface, with_S=True, with_D=False, stream=None):
dD_ptr, dS_ptr,
ctypes.cast(grid_coords.data.ptr, ctypes.c_void_p),
ctypes.cast(norm_vec.data.ptr, ctypes.c_void_p),
ctypes.cast(R_vdw.data.ptr, ctypes.c_void_p),
ctypes.cast(charge_exp.data.ptr, ctypes.c_void_p),
ctypes.cast(switch_fun.data.ptr, ctypes.c_void_p),
ctypes.c_int(n)
)
if err != 0:
Expand All @@ -181,7 +169,7 @@ def get_dSii(surface, dF):
dSii = dSii_dF[:,None] * dF
return dSii

def grad_nuc(pcmobj, dm):
def grad_nuc(pcmobj, dm, q_sym = None):
mol = pcmobj.mol
log = logger.new_logger(mol, mol.verbose)
t1 = log.init_timer()
Expand All @@ -194,7 +182,8 @@ def grad_nuc(pcmobj, dm):
pcmobj._get_vind(dm)

mol = pcmobj.mol
q_sym = pcmobj._intermediates['q_sym'].get()
if q_sym is None:
q_sym = pcmobj._intermediates['q_sym'].get()
gridslice = pcmobj.surface['gslice_by_atom']
grid_coords = pcmobj.surface['grid_coords'].get()
exponents = pcmobj.surface['charge_exp'].get()
Expand All @@ -220,7 +209,7 @@ def grad_nuc(pcmobj, dm):
t1 = log.timer_debug1('grad nuc', *t1)
return de

def grad_qv(pcmobj, dm):
def grad_qv(pcmobj, dm, q_sym = None):
'''
contributions due to integrals
'''
Expand All @@ -237,7 +226,8 @@ def grad_qv(pcmobj, dm):
gridslice = pcmobj.surface['gslice_by_atom']
charge_exp = pcmobj.surface['charge_exp']
grid_coords = pcmobj.surface['grid_coords']
q_sym = pcmobj._intermediates['q_sym']
if q_sym is None:
q_sym = pcmobj._intermediates['q_sym']

intopt = int3c1e.VHFOpt(mol)
intopt.build(1e-14, aosym=False)
Expand Down Expand Up @@ -282,37 +272,37 @@ def grad_solver(pcmobj, dm):
vK_1 = cupy.linalg.solve(K.T, v_grids)
epsilon = pcmobj.eps

def contract_bra(a, B, c):
''' i,xij,j->jx '''
tmp = a.dot(B)
return (tmp*c).T

def contract_ket(a, B, c):
''' i,xij,j->ix '''
tmp = B.dot(c)
return (a*tmp).T

de = cupy.zeros([pcmobj.mol.natm,3])
if pcmobj.method.upper() in ['C-PCM', 'CPCM', 'COSMO']:
dD, dS = get_dD_dS(pcmobj.surface, with_D=False, with_S=True)

# dR = 0, dK = dS
de_dS = (vK_1 * dS.dot(q)).T # cupy.einsum('i,xij,j->ix', vK_1, dS, q)
de_dS = 0.5 * contract_ket(vK_1, dS, q)
de_dS -= 0.5 * contract_bra(vK_1, dS, q)
de -= cupy.asarray([cupy.sum(de_dS[p0:p1], axis=0) for p0,p1 in gridslice])
dD = dS = None

dF, dA = get_dF_dA(pcmobj.surface)
dSii = get_dSii(pcmobj.surface, dF)
de -= 0.5*contract('i,xij->jx', vK_1*q, dSii) # 0.5*cupy.einsum('i,xij,i->jx', vK_1, dSii, q)

elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD']:
elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SMD']:
dF, dA = get_dF_dA(pcmobj.surface)
dSii = get_dSii(pcmobj.surface, dF)
dF = None

dD, dS = get_dD_dS(pcmobj.surface, with_D=True, with_S=True)

def contract_bra(a, B, c):
''' i,xij,j->jx '''
tmp = a.dot(B)
return (tmp*c).T

def contract_ket(a, B, c):
''' i,xij,j->ix '''
tmp = B.dot(c)
return (a*tmp).T

# IEF-PCM and SS(V)PE formally are the same in gradient calculation
# dR = f_eps/(2*pi) * (dD*A + D*dA),
# dK = dS - f_eps/(2*pi) * (dD*A*S + D*dA*S + D*A*dS)
f_epsilon = (epsilon - 1.0)/(epsilon + 1.0)
Expand Down Expand Up @@ -352,6 +342,67 @@ def contract_ket(a, B, c):

de_dK = de_dS0 - fac * (de_dD + de_dA + de_dS1)
de += de_dR - de_dK
elif pcmobj.method.upper() in [ 'SS(V)PE' ]:
dF, dA = get_dF_dA(pcmobj.surface)
dSii = get_dSii(pcmobj.surface, dF)
dF = None

dD, dS = get_dD_dS(pcmobj.surface, with_D=True, with_S=True)

# dR = f_eps/(2*pi) * (dD*A + D*dA),
# dK = dS - f_eps/(2*pi) * (dD*A*S + D*dA*S + D*A*dS)
f_epsilon = (epsilon - 1.0)/(epsilon + 1.0)
fac = f_epsilon/(2.0*PI)

Av = A*v_grids
de_dR = 0.5*fac * contract_ket(vK_1, dD, Av)
de_dR -= 0.5*fac * contract_bra(vK_1, dD, Av)
de_dR = cupy.asarray([cupy.sum(de_dR[p0:p1], axis=0) for p0,p1 in gridslice])

vK_1_D = vK_1.dot(D)
vK_1_Dv = vK_1_D * v_grids
de_dR += 0.5*fac * contract('j,xjn->nx', vK_1_Dv, dA)

de_dS0 = 0.5*contract_ket(vK_1, dS, q)
de_dS0 -= 0.5*contract_bra(vK_1, dS, q)
de_dS0 = cupy.asarray([cupy.sum(de_dS0[p0:p1], axis=0) for p0,p1 in gridslice])

vK_1_q = vK_1 * q
de_dS0 += 0.5*contract('i,xin->nx', vK_1_q, dSii)

vK_1_DA = vK_1_D*A
de_dS1 = 0.5*contract_ket(vK_1_DA, dS, q)
de_dS1 -= 0.5*contract_bra(vK_1_DA, dS, q)
de_dS1 = cupy.asarray([cupy.sum(de_dS1[p0:p1], axis=0) for p0,p1 in gridslice])
vK_1_DAq = vK_1_DA*q
de_dS1 += 0.5*contract('j,xjn->nx', vK_1_DAq, dSii)

DT_q = cupy.dot(D.T, q)
ADT_q = A * DT_q
de_dS1_T = 0.5*contract_ket(vK_1, dS, ADT_q)
de_dS1_T -= 0.5*contract_bra(vK_1, dS, ADT_q)
de_dS1_T = cupy.asarray([cupy.sum(de_dS1_T[p0:p1], axis=0) for p0,p1 in gridslice])
vK_1_ADT_q = vK_1 * ADT_q
de_dS1_T += 0.5*contract('j,xjn->nx', vK_1_ADT_q, dSii)

Sq = cupy.dot(S,q)
ASq = A*Sq
de_dD = 0.5*contract_ket(vK_1, dD, ASq)
de_dD -= 0.5*contract_bra(vK_1, dD, ASq)
de_dD = cupy.asarray([cupy.sum(de_dD[p0:p1], axis=0) for p0,p1 in gridslice])

vK_1_S = cupy.dot(vK_1, S)
vK_1_SA = vK_1_S * A
de_dD_T = 0.5*contract_ket(vK_1_SA, -dD.transpose(0,2,1), q)
de_dD_T -= 0.5*contract_bra(vK_1_SA, -dD.transpose(0,2,1), q)
de_dD_T = cupy.asarray([cupy.sum(de_dD_T[p0:p1], axis=0) for p0,p1 in gridslice])

de_dA = 0.5*contract('j,xjn->nx', vK_1_D*Sq, dA) # 0.5*cupy.einsum('j,xjn,j->nx', vK_1_D, dA, Sq)

de_dA_T = 0.5*contract('j,xjn->nx', vK_1_S*DT_q, dA)

de_dK = de_dS0 - 0.5 * fac * (de_dD + de_dA + de_dS1 + de_dD_T + de_dA_T + de_dS1_T)
de += de_dR - de_dK
else:
raise RuntimeError(f"Unknown implicit solvent model: {pcmobj.method}")
t1 = log.timer_debug1('grad solver', *t1)
Expand Down
Loading
Loading