Commit ae7ef0d9 by Karsa Zoltán István

Merge branch 'cupy' into 'master'

Cupy

See merge request !1
parents e9a4645a c274dad0
...@@ -49,3 +49,4 @@ coverage.xml ...@@ -49,3 +49,4 @@ coverage.xml
# Sphinx documentation # Sphinx documentation
docs/_build/ docs/_build/
.vscode/
\ No newline at end of file
This diff is collapsed. Click to expand it.
import numpy, matplotlib.pyplot as plt import cupy as cp, matplotlib.pyplot as plt
from numba import cuda from numba import cuda
from utils import expSpace from utils import expSpace
...@@ -6,46 +6,87 @@ def gen_angels_to_pick(n, plot = False): ...@@ -6,46 +6,87 @@ def gen_angels_to_pick(n, plot = False):
if n % 2 == 0: if n % 2 == 0:
raise "n%2==0" raise "n%2==0"
N = int((n + 3) / 2) N = int((n + 3) / 2)
X1 = expSpace(0.0, numpy.pi/2.0, N) X1 = expSpace(0.0, cp.pi/2.0, N)
X3 = expSpace(numpy.pi/2.0, numpy.pi, N) X3 = expSpace(cp.pi/2.0, cp.pi, N)
X3 = -1*X3 + 3.0*numpy.pi/2.0 X3 = -1*X3 + 3.0*cp.pi/2.0
anglestopick = numpy.concatenate((X1, X3), axis=None) anglestopick = cp.concatenate((X1, X3), axis=None)
anglestopick = numpy.unique(anglestopick) # Vigyázni vele! anglestopick = cp.unique(anglestopick) # Vigyázni vele!
anglestopick = anglestopick[1:-1] anglestopick = anglestopick[1:-1]
if plot: if plot:
Y = numpy.zeros(n) Y = cp.zeros(n)
plt.plot(anglestopick, Y, '|') plt.plot(anglestopick, Y, '|')
plt.show() plt.show()
return anglestopick return anglestopick
parosit = cp.RawKernel(r'''
extern "C"
__global__
void parosit(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid];
if (m*m <= tid*m+m-1)
return;
for (int i = 0; i < m; i++) {
float betha = x2[i];
if ((alpha + betha) < PI && betha >= alpha && alpha > 0.0) {
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
} else {
a[tid*m+i] = -1.0;
b[tid*m+i] = -1.0;
}
}
}
''', 'parosit')
parosit2 = cp.RawKernel(r'''
extern "C"
__global__
void parosit2(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid];
for (int i = 0; i < m; i++) {
float betha = x2[i];
if ((alpha + betha) < PI && alpha > 0.0) {
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
} else {
a[tid*m+i] = -1.0;
b[tid*m+i] = -1.0;
}
}
}
''', 'parosit2')
def angles_alap(anglestopick, plot = False): def angles_alap(anglestopick, plot = False):
alpha_arr = numpy.array([]) m = anglestopick.size
beta_arr = numpy.array([]) alpha_arr = cp.zeros((m, m), dtype=cp.float64)
beta_arr = cp.zeros((m, m), dtype=cp.float64)
for alpha in anglestopick: blocksize = int((m + 64 - 1) / 64)
for beta in anglestopick: parosit((blocksize,), (64,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
if alpha + beta < numpy.pi and beta >= alpha and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha) tompa_beta_arr = beta_arr[beta_arr > cp.pi]
beta_arr = numpy.insert(beta_arr, 0, beta) tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2
hegyes_beta_arr = beta_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tompa_beta_arr = beta_arr[beta_arr > numpy.pi] tompa_alpha_arr = alpha_arr[beta_arr > cp.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2 hegyes_alpha_arr = alpha_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi] tghB = cp.tan(hegyes_beta_arr)
hegyes_alpha_arr = alpha_arr[beta_arr <= numpy.pi] tghA = cp.tan(hegyes_alpha_arr)
tgtB_mpi = cp.tan(tompa_beta_mpi_arr)
tghB = numpy.tan(hegyes_beta_arr) tgtA = cp.tan(tompa_alpha_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
# hegyes # hegyes
sztgh = tghB*tghA sztgh = tghB * tghA
otgh = tghA + tghB otgh = tghA + tghB
hCy = sztgh/otgh hCy = sztgh / otgh
hCx = hCy / tghA hCx = hCy / tghA
# tompa # tompa
...@@ -54,32 +95,29 @@ def angles_alap(anglestopick, plot = False): ...@@ -54,32 +95,29 @@ def angles_alap(anglestopick, plot = False):
tCy = tgtA / mtgt tCy = tgtA / mtgt
tCx = tCy / tgtA tCx = tCy / tgtA
Cy = numpy.concatenate((hCy, tCy), axis=None) Cy = cp.concatenate((hCy, tCy), axis=None)
Cx = numpy.concatenate((hCx, tCx), axis=None) Cx = cp.concatenate((hCx, tCx), axis=None)
return Cx, Cy return Cx, Cy
def angles_ratet(anglestopick, plot = False): def angles_ratet(anglestopick, plot = False):
alpha_arr = numpy.array([]) m = anglestopick.size
beta_arr = numpy.array([]) alpha_arr = cp.zeros((m, m), dtype=cp.float64)
beta_arr = cp.zeros((m, m), dtype=cp.float64)
for alpha in anglestopick: blocksize = int((m + 64 - 1) / 64)
for beta in anglestopick: parosit2((blocksize,), (m,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
if alpha + beta < numpy.pi and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha) tompa_beta_arr = beta_arr[beta_arr > cp.pi]
beta_arr = numpy.insert(beta_arr, 0, beta) tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2
hegyes_beta_arr = beta_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tompa_beta_arr = beta_arr[beta_arr > numpy.pi] tompa_alpha_arr = alpha_arr[beta_arr > cp.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2 hegyes_alpha_arr = alpha_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi] tghB = cp.tan(hegyes_beta_arr)
hegyes_alpha_arr = alpha_arr[beta_arr <= numpy.pi] tghA = cp.tan(hegyes_alpha_arr)
tgtB_mpi = cp.tan(tompa_beta_mpi_arr)
tghB = numpy.tan(hegyes_beta_arr) tgtA = cp.tan(tompa_alpha_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
# hegyes # hegyes
sztgh = tghB*tghA sztgh = tghB*tghA
...@@ -93,14 +131,14 @@ def angles_ratet(anglestopick, plot = False): ...@@ -93,14 +131,14 @@ def angles_ratet(anglestopick, plot = False):
tCy = tgtA / mtgt tCy = tgtA / mtgt
tCx = tCy / tgtA tCx = tCy / tgtA
Ey = numpy.concatenate((hCy, tCy), axis=None) Ey = cp.concatenate((hCy, tCy), axis=None)
Ex = numpy.concatenate((hCx, tCx), axis=None) Ex = cp.concatenate((hCx, tCx), axis=None)
cos = numpy.cos(anglestopick) cos = cp.cos(anglestopick)
sin = numpy.sin(anglestopick) sin = cp.sin(anglestopick)
Dx = numpy.outer(numpy.full(anglestopick.size, 1.0), Ex).flatten() Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
Dy = numpy.outer(cos, Ey).flatten() Dy = cp.outer(cos, Ey).flatten()
Dz = numpy.outer(sin, Ey).flatten() Dz = cp.outer(sin, Ey).flatten()
return Dx, Dy, Dz return Dx, Dy, Dz
\ No newline at end of file
from numba import cuda from numba import cuda
import numpy import cupy as cp
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w): with open('epgpu.cu') as f:
Cx_global = cuda.to_device(Cx) code = f.read()
Cy_global = cuda.to_device(Cy)
Dx_global = cuda.to_device(Dx)
Dy_global = cuda.to_device(Dy)
Dz_global = cuda.to_device(Dz)
egyensulyi_mtx = cuda.device_array((Dz.size, 4, 4))
gpu_egyensuly[256, 256](v, w, Cx_global, Cy_global, Dx_global, Dy_global, Dz_global, egyensulyi_mtx)
return egyensulyi_mtx.copy_to_host()
@cuda.jit
def gpu_egyensuly(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
ABC_oldal(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx)
@cuda.jit(device=True)
def ABC_oldal(v, w, Cx_arr, Cy_arr, Dx_arr, Dy_arr, Dz_arr, egyensulyi_mtx):
pos = cuda.grid(1)
if pos >= Dx_arr.size:
return
Cx = Cx_arr[pos]
Cy = Cy_arr[pos]
Dx = Dx_arr[pos]
Dy = Dy_arr[pos]
Dz = Dz_arr[pos]
ABx = 1.0/v
ABy = 0.0
ABz = 0.0
ACx = (Cx - 0.0)/v kers = ('gpu_egyensulyi', )
ACy = (Cy - 0.0)/v ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
ACz = 0.0 # (0.0 - 0.0)/v fun = ep_pontok_module.get_function(kers[0])
for i in range(v+1): def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
for j in range(v+1): print("Res size (byte): ", Cx.size*Dx.size*4*4)
oKx = i * ABx + j * ACx print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
oKy = i * ABy + j * ACy egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
oKz = i * ABz + j * ACz numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
Lx = (Dx - oKx)/w fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx))
Ly = (Dy - oKy)/w
Lz = (Dz - oKz)/w
for k in range(w+1):
Sx = oKx + Lx*k
Sy = oKy + Ly*k
Sz = oKz + Lz*k
S = stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
U = instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
H = nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
if S + U - H == 2:
egyensulyi_mtx[pos, S, U] = 1
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return stabil sp száma S súlypontnál
@cuda.jit(device=True)
def stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return instabil sp száma S a testre nézve
@cuda.jit(device=True)
def instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok return egyensulyi_mtx
# return nyereg sp száma S az IJKL testre nézve
@cuda.jit(device=True)
def nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
\ No newline at end of file
#!/bin/bash
#SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból
#SBATCH --time=20:00:00 # maximális idő
#SBATCH -o politopok.out # kimeneti fájl
module load anaconda
for n in {40..55}
do
echo "python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10"
time python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10
done
#!/bin/bash
#SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból
#SBATCH --time=20:00:00 # maximális idő
#SBATCH -o politopok.out # kimeneti fájl
module load anaconda
for n in {20..30}
do
echo "python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10"
time python tetrarun.py -n $((n*2 + 1)) -v 100 -w 100
done
#bin/bash
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
\ No newline at end of file
...@@ -32,16 +32,15 @@ def main(argv): ...@@ -32,16 +32,15 @@ def main(argv):
outputfile = arg outputfile = arg
elif opt in ("-p", "--plot"): elif opt in ("-p", "--plot"):
PLOT = True PLOT = True
print('Output file is: ', outputfile)
print('Conf: ', [n, v, w])
space = gen_angels_to_pick(n, PLOT) space = gen_angels_to_pick(n, PLOT)
Cx, Cy = angles_alap(space, PLOT) Cx, Cy = angles_alap(space, PLOT)
Dx, Dy, Dz = angles_ratet(space) Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w) res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
print(res) #print(res)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main(sys.argv[1:])
\ No newline at end of file
import numpy import cupy as cp
def expSpace(min, max, N, exponentialliness = 20.0): def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = numpy.linspace(0, numpy.log10(exponentialliness+1),N) LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment