Vlasov_Drifit_terms
The program primarily leverages the difference to compute the Drift term.
Advantages of the program:
- Fast parallel processing of Drifit terms with CUDA
- Utilizing UPFD methods and flux limiters to ensure “constant positive” computational processes
The main function of the program:
Drift_Vlasov_terms()
This function mainly calculates the Drifit term:
- The initialization process is executed initially, while the Drift term is concurrently calculated using the kernel function.
def Drift_Vlasov_terms(f_x_p_t, Fx, Fy, Fz, \ masses, total_grid, num_of_particle_types, \ npx, npy, npz, nx, ny, nz, \ half_px, half_py, half_pz, \ dx, dy, dz, dpx, dpy, dpz, number_momentum_levels,\ x_bound_config, y_bound_config, z_bound_config, \ blockspergrid_total_phase, threadsperblock,\ collision_term, dt, c, current_time_step, \ whether_drift, whether_Vlasov, drift_order): f_updated = cupy.zeros_like(f_x_p_t) drift_force_term_kernel[blockspergrid_total_phase, threadsperblock]\ (f_x_p_t, Fx, Fy, Fz, \ masses, total_grid, num_of_particle_types, \ npx, npy, npz, nx, ny, nz,\ half_px, half_py, half_pz, \ dx, dy, dz, dpx, dpy, dpz, number_momentum_levels,\ x_bound_config, y_bound_config, z_bound_config, \ f_updated, collision_term, \ dt, c, current_time_step, whether_drift, whether_Vlasov,\ drift_order) return f_updated
- Furthermore, there is a correspondence established between GPU threads and momentum, spatial grids:
- This primarily involves the mutual conversion between three-dimensional position space and three-dimensional momentum space with one-dimensional thread indices.
- Special processing is applied to indices located at boundary conditions.
i_grid = cuda.grid(1) if i_grid < total_grid: # convert one-d index into six-d ipz = i_grid%npz ipz_rest = i_grid//npz ipy = ipz_rest%npy ipy_rest = ipz_rest//npy ipx = ipy_rest%npx ipx_rest = ipy_rest//npx iz = ipx_rest%nz iz_rest = ipx_rest//nz iy = iz_rest%ny iy_rest = iz_rest//ny ix = iy_rest%nx # distribution functions out of the computation domain of each card are always set to be 0 ixPlus = (ix+1) iyPlus = (iy+1) izPlus = (iz+1) ixMinus = (ix-1) iyMinus = (iy-1) izMinus = (iz-1) ixPlusPlus = (ix+2) iyPlusPlus = (iy+2) izPlusPlus = (iz+2) ixMinusMinus = (ix-2) iyMinusMinus = (iy-2) izMinusMinus = (iz-2) # convert six-d to one-d i_phasexmin = threeD_to_oneD(ixMinus, iy, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasexmax = threeD_to_oneD(ixPlus, iy, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phaseymin = threeD_to_oneD(ix, iyMinus, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phaseymax = threeD_to_oneD(ix, iyPlus, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasezmin = threeD_to_oneD(ix, iy, izMinus, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasezmax = threeD_to_oneD(ix, iy, izPlus, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasexminmin = threeD_to_oneD(ixMinusMinus, iy, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasexmaxmax = threeD_to_oneD(ixPlusPlus, iy, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phaseyminmin = threeD_to_oneD(ix, iyMinusMinus, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phaseymaxmax = threeD_to_oneD(ix, iyPlusPlus, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasezminmin = threeD_to_oneD(ix, iy, izMinusMinus, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasezmaxmax = threeD_to_oneD(ix, iy, izPlusPlus, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz) # enforce periodical boundary conditions, ipxPlus --> ipx+1 ipxPlus = (ipx+1)%npx ipyPlus = (ipy+1)%npy ipzPlus = (ipz+1)%npz # -1%3 should be 2, but cuda yields 0, so we use ipxMinus = (ipx-1+npx)%npx ipyMinus = (ipy-1+npy)%npy ipzMinus = (ipz-1+npz)%npz # convert six-d to one-d i_phasepxmin = threeD_to_oneD(ix, iy, iz, ipxMinus, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasepxmax = threeD_to_oneD(ix, iy, iz, ipxPlus, ipy, ipz, nx, ny, nz, npx, npy, npz) i_phasepymin = threeD_to_oneD(ix, iy, iz, ipx, ipyMinus, ipz, nx, ny, nz, npx, npy, npz) i_phasepymax = threeD_to_oneD(ix, iy, iz, ipx, ipyPlus, ipz, nx, ny, nz, npx, npy, npz) i_phasepzmin = threeD_to_oneD(ix, iy, iz, ipx, ipy, ipzMinus, nx, ny, nz, npx, npy, npz) i_phasepzmax = threeD_to_oneD(ix, iy, iz, ipx, ipy, ipzPlus, nx, ny, nz, npx, npy, npz)
- The equation is decomposed into three parts :
The calculation is performed on the first part and its momentum:
for p_type in range(num_of_particle_types):
# masses
mp_squared = masses[p_type]**2
# loop through all momentum levels
for i_level in range(number_momentum_levels):
fcurrent = f_x_p_t[i_level, p_type, i_grid]
# parts of the numerator in UPFD
numerator0 = fcurrent/dt/3.
# vx, vy, vz
vx = 0.
vy = 0.
vz = 0.
# Fx, Fy, Fz
Fcurrentpx = 0.
Fcurrentpy = 0.
Fcurrentpz = 0.
# acquire p from the central value
# Note that for different particles, they have different dpx and px_left_bound
# the momentum level corresponds to the level of straitification
px = ((ipx+0.5)*dpx[p_type] - half_px[p_type])/(npx**i_level)
py = ((ipy+0.5)*dpy[p_type] - half_py[p_type])/(npy**i_level)
pz = ((ipz+0.5)*dpz[p_type] - half_pz[p_type])/(npz**i_level)
# p0 for current grid
p0 = math.sqrt(mp_squared*c**2+px**2+py**2+pz**2)
# p0 = math.sqrt((mp_squared*c)**2+px**2+py**2+pz**2)
if p0 > 10**(-13):
vx = c*px/p0
vy = c*py/p0
vz = c*pz/p0
The values of the distribution function at the left and right boundaries are constrained:
fleftx = left_bound_detect(ixMinus, f_x_p_t, i_level, p_type, i_phasexmin)
flefty = left_bound_detect(iyMinus, f_x_p_t, i_level, p_type, i_phaseymin)
fleftz = left_bound_detect(izMinus, f_x_p_t, i_level, p_type, i_phasezmin)
frightx = right_bound_detect(ixPlus, nx, f_x_p_t, i_level, p_type, i_phasexmax)
frighty = right_bound_detect(iyPlus, ny, f_x_p_t, i_level, p_type, i_phaseymax)
frightz = right_bound_detect(izPlus, nz, f_x_p_t, i_level, p_type, i_phasezmax)
fleftleftx = left_bound_detect(ixMinusMinus, f_x_p_t, i_level, p_type, i_phasexminmin)
fleftlefty = left_bound_detect(iyMinusMinus, f_x_p_t, i_level, p_type, i_phaseyminmin)
fleftleftz = left_bound_detect(izMinusMinus, f_x_p_t, i_level, p_type, i_phasezminmin)
frightrightx = right_bound_detect(ixPlusPlus, nx, f_x_p_t, i_level, p_type, i_phasexmaxmax)
frightrighty = right_bound_detect(iyPlusPlus, ny, f_x_p_t, i_level, p_type, i_phaseymaxmax)
frightrightz = right_bound_detect(izPlusPlus, nz, f_x_p_t, i_level, p_type, i_phasezmaxmax)
The invoked functions are shown below:
@cuda.jit(device=True)
def left_bound_detect(ixMinus, f_x_p_t, i_level, p_type, i_phasexmin):
# if ixMinus is smaller than 0, set f -> 0
if ixMinus < -0.5:
return 0.
else:
return f_x_p_t[i_level, p_type, i_phasexmin]
@cuda.jit(device=True)
def right_bound_detect(ixPlus, nx, f_x_p_t, i_level, p_type, i_phasexmax):
# if ixPlus is larger than nx-1, set f -> 0
if ixPlus > (nx-0.5):
return 0.
else:
return f_x_p_t[i_level, p_type, i_phasexmax]
- The computation of the “Drift term” and “Vlasov term” is provided in two different ways:
First-order and Second-order.:
- whether_drift—-Parameter for the computation of the Drift term (>0.5 indicates computation);
- whether_Vlasov—-Parameter for the computation of the Vlasov term (>0.5 indicates computation);
- drift_order—-Parameter for selecting the order (>1.5 indicates second-order format; <1.5 indicates first-order format) Default value in the program is 2, which can be modified by the user.
- The Drifit term is initially computed using the first-order approach.
The expression in the program is:
if whether_drift > 0.5:
if drift_order < 1.5:
# first order upwind
numerator1 = UPFD(dx, vx, fleftx, frightx) + UPFD(dy, vy, flefty, frighty) + UPFD(dz, vz, fleftz, frightz)
# denominator
# 1/dt+vx/dx+vy/dy+vz/dz+Fx/dpx+Fy/dpy+Fz/dpz
denominator1 = 1/dt+3*abs(vx/dx)+3*abs(vy/dy)+3*abs(vz/dz)
f_drift = (numerator0+numerator1)/denominator1
The functions used are shown below:
@cuda.jit(device=True)
def UPFD(dx, vx, fleft, fright):
# method from doi:10.1016/j.mcm.2011.05.005
if vx > 0.:
if abs(vx)<10**(-19):
vx = 0.
return vx/dx*fleft
else:
if abs(vx)<10**(-19):
vx = 0.
return -vx/dx*fright
- Calculation of the drifit term in a second-order manner
elif drift_order > 1.5:
theta = 10**(-10)
###############################################
# flux_x
F_plus_i_x = F_plus_minus(1, vx, fcurrent)
F_plus_i_plus_1_x = F_plus_minus(1, vx, frightx)
F_plus_i_minus_1_x = F_plus_minus(1, vx, fleftx)
F_plus_i_minus_2_x = F_plus_minus(1, vx, fleftleftx)
F_minus_i_x = F_plus_minus(-1, vx, fcurrent)
F_minus_i_minus_1_x = F_plus_minus(-1, vx, fleftx)
F_minus_i_plus_1_x = F_plus_minus(-1, vx, frightx)
F_minus_i_plus_2_x = F_plus_minus(-1, vx, frightrightx)
#################################################
# flux_y
F_plus_i_y = F_plus_minus(1, vy, fcurrent)
F_plus_i_plus_1_y = F_plus_minus(1, vy, frighty)
F_plus_i_minus_1_y = F_plus_minus(1, vy, flefty)
F_plus_i_minus_2_y = F_plus_minus(1, vy, fleftlefty)
F_minus_i_y = F_plus_minus(-1, vy, fcurrent)
F_minus_i_minus_1_y = F_plus_minus(-1, vy, flefty)
F_minus_i_plus_1_y = F_plus_minus(-1, vy, frighty)
F_minus_i_plus_2_y = F_plus_minus(-1, vy, frightrighty)
####################################################
# flux_z
F_plus_i_z = F_plus_minus(1, vz, fcurrent)
F_plus_i_plus_1_z = F_plus_minus(1, vz, frightz)
F_plus_i_minus_1_z = F_plus_minus(1, vz, fleftz)
F_plus_i_minus_2_z = F_plus_minus(1, vz, fleftleftz)
F_minus_i_z = F_plus_minus(-1, vz, fcurrent)
F_minus_i_minus_1_z = F_plus_minus(-1, vz, fleftz)
F_minus_i_plus_1_z = F_plus_minus(-1, vz, frightz)
F_minus_i_plus_2_z = F_plus_minus(-1, vz, frightrightz)
The functions used are shown below:
@cuda.jit(device=True)
def F_plus_minus(sign, vx, fcurrent):
if vx*sign > 0.:
return vx*fcurrent
else:
return 0.
- Flux Limiter:
# flux limiter_x
phi_plus_i_plus_half_x = phi_plus(F_plus_i_minus_1_x, F_plus_i_x, F_plus_i_plus_1_x, theta)
phi_minus_i_plus_half_x = phi_minus(F_minus_i_x, F_minus_i_plus_1_x, F_minus_i_plus_2_x, theta)
phi_plus_i_minus_half_x = phi_plus(F_plus_i_minus_2_x, F_plus_i_minus_1_x, F_plus_i_x, theta)
phi_minus_i_minus_half_x = phi_minus(F_minus_i_minus_1_x, F_minus_i_x, F_minus_i_plus_1_x, theta)
# flux limiter_y
phi_plus_i_plus_half_y = phi_plus(F_plus_i_minus_1_y, F_plus_i_y, F_plus_i_plus_1_y, theta)
phi_minus_i_plus_half_y = phi_minus(F_minus_i_y, F_minus_i_plus_1_y, F_minus_i_plus_2_y, theta)
phi_plus_i_minus_half_y = phi_plus(F_plus_i_minus_2_y, F_plus_i_minus_1_y, F_plus_i_y, theta)
phi_minus_i_minus_half_y = phi_minus(F_minus_i_minus_1_y, F_minus_i_y, F_minus_i_plus_1_y, theta)
# flux limiter_z
phi_plus_i_plus_half_z = phi_plus(F_plus_i_minus_1_z, F_plus_i_z, F_plus_i_plus_1_z, theta)
phi_minus_i_plus_half_z = phi_minus(F_minus_i_z, F_minus_i_plus_1_z, F_minus_i_plus_2_z, theta)
phi_plus_i_minus_half_z = phi_plus(F_plus_i_minus_2_z, F_plus_i_minus_1_z, F_plus_i_z, theta)
phi_minus_i_minus_half_z = phi_minus(F_minus_i_minus_1_z, F_minus_i_z, F_minus_i_plus_1_z, theta)
The functions used are shown below:
# define limiter function phi_minus
@cuda.jit(device=True)
def phi_minus(F_minus, F_minus_right, F_minus_rightright, theta):
if abs(F_minus_right-F_minus_rightright) < 10**(-18):
monotonicity_preservation = 0.
else:
monotonicity_preservation = (F_minus-F_minus_right)/(F_minus_right-F_minus_rightright)
if abs(F_minus_right) < 10**(-18):
posivity_preservation = 2/max(theta, 0.)
else:
posivity_preservation = 2/max(theta, (F_minus_rightright-F_minus_right)/F_minus_right)
return max(0., min(1., monotonicity_preservation, posivity_preservation))
# define limiter function phi_plus
@cuda.jit(device=True)
def phi_plus(F_plus_left, F_plus, F_plus_right, theta):
if abs(F_plus-F_plus_left) < 10**(-18):
monotonicity_preservation = 0.
else:
monotonicity_preservation = (F_plus_right-F_plus)/(F_plus-F_plus_left)
if abs(F_plus) < 10**(-18):
posivity_preservation = 2/max(theta, 0.)
else:
posivity_preservation = 2/max(theta, (F_plus_left-F_plus)/F_plus)
return max(0., min(1., monotonicity_preservation, posivity_preservation))
- The final step in calculating the Drifit term in the second-order format integrates the results of the calculation
# flux at the middle grid_x
F_i_plus_half_x = F_i_plus_half(vx, F_plus_i_x, phi_plus_i_plus_half_x, F_plus_i_minus_1_x, F_minus_i_plus_1_x, phi_minus_i_plus_half_x, F_minus_i_plus_2_x)
F_i_minus_half_x = F_i_plus_half(vx, F_plus_i_minus_1_x, phi_plus_i_minus_half_x, F_plus_i_minus_2_x, F_minus_i_x, phi_minus_i_minus_half_x, F_minus_i_plus_1_x)
# flux at the middle grid_y
F_i_plus_half_y = F_i_plus_half(vy, F_plus_i_y, phi_plus_i_plus_half_y, F_plus_i_minus_1_y, F_minus_i_plus_1_y, phi_minus_i_plus_half_y, F_minus_i_plus_2_y)
F_i_minus_half_y = F_i_plus_half(vy, F_plus_i_minus_1_y, phi_plus_i_minus_half_y, F_plus_i_minus_2_y, F_minus_i_y, phi_minus_i_minus_half_y, F_minus_i_plus_1_y)
# flux at the middle grid_z
F_i_plus_half_z = F_i_plus_half(vz, F_plus_i_z, phi_plus_i_plus_half_z, F_plus_i_minus_1_z, F_minus_i_plus_1_z, phi_minus_i_plus_half_z, F_minus_i_plus_2_z)
F_i_minus_half_z = F_i_plus_half(vz, F_plus_i_minus_1_z, phi_plus_i_minus_half_z, F_plus_i_minus_2_z, F_minus_i_z, phi_minus_i_minus_half_z, F_minus_i_plus_1_z)
f_drift = fcurrent/3 - (dt/dx*(F_i_plus_half_x-F_i_minus_half_x) + dt/dy*(F_i_plus_half_y-F_i_minus_half_y) + dt/dz*(F_i_plus_half_z-F_i_minus_half_z))
The functions used are shown below:
# define flux F_i_plus_half
@cuda.jit(device=True)
def F_i_plus_half(vx, F_plus_i, phi_plus_i_plus_half, F_plus_i_minus_1, F_minus_i_plus_1, phi_minus_i_plus_half, F_minus_i_plus_2):
return F_plus_i + 0.5*phi_plus_i_plus_half*(F_plus_i-F_plus_i_minus_1) + F_minus_i_plus_1 + 0.5*phi_minus_i_plus_half*(F_minus_i_plus_1-F_minus_i_plus_2)
- The Vlasov term is calculated using the same approach.
if whether_Vlasov > 0.5:
if drift_order < 1.5:
# first order upwind
# Vlasov term
fleftpx = f_x_p_t[i_level, p_type, i_phasepxmin]
fleftpy = f_x_p_t[i_level, p_type, i_phasepymin]
fleftpz = f_x_p_t[i_level, p_type, i_phasepzmin]
frightpx = f_x_p_t[i_level, p_type, i_phasepxmax]
frightpy = f_x_p_t[i_level, p_type, i_phasepymax]
frightpz = f_x_p_t[i_level, p_type, i_phasepzmax]
# External forces at p, p-dpx, px+dpx
Fcurrentpx = Fx[i_level, p_type, i_grid]
Fcurrentpy = Fy[i_level, p_type, i_grid]
Fcurrentpz = Fz[i_level, p_type, i_grid]
numerator2 = UPFD(dpx[p_type], Fcurrentpx, fleftpx, frightpx) + UPFD(dpy[p_type], Fcurrentpy, fleftpy, frightpy) + UPFD(dpz[p_type], Fcurrentpz, fleftpz, frightpz)
# denominator
# 1/dt+vx/dx+vy/dy+vz/dz+Fx/dpx+Fy/dpy+Fz/dpz
denominator2 = 1/dt+3*abs(Fcurrentpx/dpx[p_type])+3*abs(Fcurrentpy/dpy[p_type])+3*abs(Fcurrentpz/dpz[p_type])
f_driftp = (numerator0+numerator2)/denominator2
- The collision term is incorporated into the distribution function.
cuda.atomic.add(f_updated, (i_level, p_type, i_grid), fcurrent/3 + dt*collision_term[i_level, p_type, i_grid])
- Adjustments are made to the values at the problematic boundaries.
- The same method is applied to handle the x, y, and z directions separately.
denominator = 1/dt+3*abs(vx/dx)+3*abs(vy/dy)+3*abs(vz/dz)
# corrections due to the physical boundary, we consider reflection and absorption or in between
if vx > 0:
# if vx > 0, the right most boundary needs to be considered
if ix > (nx - 1.5):
# mirror index for ipx
ipx_mirror = mirror(npx, ipx)
i_grid_mirror = threeD_to_oneD(ix, iy, iz, ipx_mirror, ipy, ipz, nx, ny, nz, npx, npy, npz)
cuda.atomic.add(f_updated, (i_level, p_type, i_grid_mirror), UPFD(dx, vx, fcurrent, 0)/(denominator)*(1-x_bound_config[iy, iz]))
else:
# if vx < 0., the left most boundary needs to be considered
if ix < 0.5:
# mirror in dex for ipx
ipx_mirror = mirror(npx, ipx)
i_grid_mirror = threeD_to_oneD(ix, iy, iz, ipx_mirror, ipy, ipz, nx, ny, nz, npx, npy, npz)
cuda.atomic.add(f_updated, (i_level, p_type, i_grid_mirror), UPFD(dx, vx, 0, fcurrent)/(denominator)*(1-x_bound_config[iy, iz]))
The functions used are shown below:
@cuda.jit(device=True)
def mirror(ngrid, i):
return ngrid-i-1
@cuda.jit(device=True)
def threeD_to_oneD(ix, iy, iz, ipx, ipy, ipz, nx, ny, nz, npx, npy, npz):
return ipz+ipy*npz+ipx*npz*npy+iz*npz*npy*npx+iy*npz*npy*npx*nz+ix*npz*npy*npx*nz*ny