Commit ae7ef0d9 by Karsa Zoltán István

Merge branch 'cupy' into 'master'

Cupy

See merge request !1
parents e9a4645a c274dad0
......@@ -49,3 +49,4 @@ coverage.xml
# Sphinx documentation
docs/_build/
.vscode/
\ No newline at end of file
This diff is collapsed. Click to expand it.
import numpy, matplotlib.pyplot as plt
import cupy as cp, matplotlib.pyplot as plt
from numba import cuda
from utils import expSpace
......@@ -6,46 +6,87 @@ def gen_angels_to_pick(n, plot = False):
if n % 2 == 0:
raise "n%2==0"
N = int((n + 3) / 2)
X1 = expSpace(0.0, numpy.pi/2.0, N)
X3 = expSpace(numpy.pi/2.0, numpy.pi, N)
X3 = -1*X3 + 3.0*numpy.pi/2.0
X1 = expSpace(0.0, cp.pi/2.0, N)
X3 = expSpace(cp.pi/2.0, cp.pi, N)
X3 = -1*X3 + 3.0*cp.pi/2.0
anglestopick = numpy.concatenate((X1, X3), axis=None)
anglestopick = numpy.unique(anglestopick) # Vigyázni vele!
anglestopick = cp.concatenate((X1, X3), axis=None)
anglestopick = cp.unique(anglestopick) # Vigyázni vele!
anglestopick = anglestopick[1:-1]
if plot:
Y = numpy.zeros(n)
Y = cp.zeros(n)
plt.plot(anglestopick, Y, '|')
plt.show()
return anglestopick
parosit = cp.RawKernel(r'''
extern "C"
__global__
void parosit(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid];
if (m*m <= tid*m+m-1)
return;
for (int i = 0; i < m; i++) {
float betha = x2[i];
if ((alpha + betha) < PI && betha >= alpha && alpha > 0.0) {
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
} else {
a[tid*m+i] = -1.0;
b[tid*m+i] = -1.0;
}
}
}
''', 'parosit')
parosit2 = cp.RawKernel(r'''
extern "C"
__global__
void parosit2(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid];
for (int i = 0; i < m; i++) {
float betha = x2[i];
if ((alpha + betha) < PI && alpha > 0.0) {
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
} else {
a[tid*m+i] = -1.0;
b[tid*m+i] = -1.0;
}
}
}
''', 'parosit2')
def angles_alap(anglestopick, plot = False):
alpha_arr = numpy.array([])
beta_arr = numpy.array([])
for alpha in anglestopick:
for beta in anglestopick:
if alpha + beta < numpy.pi and beta >= alpha and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha)
beta_arr = numpy.insert(beta_arr, 0, beta)
tompa_beta_arr = beta_arr[beta_arr > numpy.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi]
hegyes_alpha_arr = alpha_arr[beta_arr <= numpy.pi]
tghB = numpy.tan(hegyes_beta_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
m = anglestopick.size
alpha_arr = cp.zeros((m, m), dtype=cp.float64)
beta_arr = cp.zeros((m, m), dtype=cp.float64)
blocksize = int((m + 64 - 1) / 64)
parosit((blocksize,), (64,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
tompa_beta_arr = beta_arr[beta_arr > cp.pi]
tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2
hegyes_beta_arr = beta_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tompa_alpha_arr = alpha_arr[beta_arr > cp.pi]
hegyes_alpha_arr = alpha_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tghB = cp.tan(hegyes_beta_arr)
tghA = cp.tan(hegyes_alpha_arr)
tgtB_mpi = cp.tan(tompa_beta_mpi_arr)
tgtA = cp.tan(tompa_alpha_arr)
# hegyes
sztgh = tghB*tghA
sztgh = tghB * tghA
otgh = tghA + tghB
hCy = sztgh/otgh
hCy = sztgh / otgh
hCx = hCy / tghA
# tompa
......@@ -54,32 +95,29 @@ def angles_alap(anglestopick, plot = False):
tCy = tgtA / mtgt
tCx = tCy / tgtA
Cy = numpy.concatenate((hCy, tCy), axis=None)
Cx = numpy.concatenate((hCx, tCx), axis=None)
Cy = cp.concatenate((hCy, tCy), axis=None)
Cx = cp.concatenate((hCx, tCx), axis=None)
return Cx, Cy
def angles_ratet(anglestopick, plot = False):
alpha_arr = numpy.array([])
beta_arr = numpy.array([])
for alpha in anglestopick:
for beta in anglestopick:
if alpha + beta < numpy.pi and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha)
beta_arr = numpy.insert(beta_arr, 0, beta)
tompa_beta_arr = beta_arr[beta_arr > numpy.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi]
hegyes_alpha_arr = alpha_arr[beta_arr <= numpy.pi]
tghB = numpy.tan(hegyes_beta_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
m = anglestopick.size
alpha_arr = cp.zeros((m, m), dtype=cp.float64)
beta_arr = cp.zeros((m, m), dtype=cp.float64)
blocksize = int((m + 64 - 1) / 64)
parosit2((blocksize,), (m,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
tompa_beta_arr = beta_arr[beta_arr > cp.pi]
tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2
hegyes_beta_arr = beta_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tompa_alpha_arr = alpha_arr[beta_arr > cp.pi]
hegyes_alpha_arr = alpha_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tghB = cp.tan(hegyes_beta_arr)
tghA = cp.tan(hegyes_alpha_arr)
tgtB_mpi = cp.tan(tompa_beta_mpi_arr)
tgtA = cp.tan(tompa_alpha_arr)
# hegyes
sztgh = tghB*tghA
......@@ -93,14 +131,14 @@ def angles_ratet(anglestopick, plot = False):
tCy = tgtA / mtgt
tCx = tCy / tgtA
Ey = numpy.concatenate((hCy, tCy), axis=None)
Ex = numpy.concatenate((hCx, tCx), axis=None)
Ey = cp.concatenate((hCy, tCy), axis=None)
Ex = cp.concatenate((hCx, tCx), axis=None)
cos = numpy.cos(anglestopick)
sin = numpy.sin(anglestopick)
cos = cp.cos(anglestopick)
sin = cp.sin(anglestopick)
Dx = numpy.outer(numpy.full(anglestopick.size, 1.0), Ex).flatten()
Dy = numpy.outer(cos, Ey).flatten()
Dz = numpy.outer(sin, Ey).flatten()
Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
Dy = cp.outer(cos, Ey).flatten()
Dz = cp.outer(sin, Ey).flatten()
return Dx, Dy, Dz
\ No newline at end of file
from numba import cuda
import numpy
import cupy as cp
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
Cx_global = cuda.to_device(Cx)
Cy_global = cuda.to_device(Cy)
Dx_global = cuda.to_device(Dx)
Dy_global = cuda.to_device(Dy)
Dz_global = cuda.to_device(Dz)
egyensulyi_mtx = cuda.device_array((Dz.size, 4, 4))
gpu_egyensuly[256, 256](v, w, Cx_global, Cy_global, Dx_global, Dy_global, Dz_global, egyensulyi_mtx)
return egyensulyi_mtx.copy_to_host()
@cuda.jit
def gpu_egyensuly(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
ABC_oldal(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx)
@cuda.jit(device=True)
def ABC_oldal(v, w, Cx_arr, Cy_arr, Dx_arr, Dy_arr, Dz_arr, egyensulyi_mtx):
pos = cuda.grid(1)
if pos >= Dx_arr.size:
return
Cx = Cx_arr[pos]
Cy = Cy_arr[pos]
Dx = Dx_arr[pos]
Dy = Dy_arr[pos]
Dz = Dz_arr[pos]
ABx = 1.0/v
ABy = 0.0
ABz = 0.0
with open('epgpu.cu') as f:
code = f.read()
ACx = (Cx - 0.0)/v
ACy = (Cy - 0.0)/v
ACz = 0.0 # (0.0 - 0.0)/v
kers = ('gpu_egyensulyi', )
ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
fun = ep_pontok_module.get_function(kers[0])
for i in range(v+1):
for j in range(v+1):
oKx = i * ABx + j * ACx
oKy = i * ABy + j * ACy
oKz = i * ABz + j * ACz
Lx = (Dx - oKx)/w
Ly = (Dy - oKy)/w
Lz = (Dz - oKz)/w
for k in range(w+1):
Sx = oKx + Lx*k
Sy = oKy + Ly*k
Sz = oKz + Lz*k
S = stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
U = instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
H = nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
if S + U - H == 2:
egyensulyi_mtx[pos, S, U] = 1
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return stabil sp száma S súlypontnál
@cuda.jit(device=True)
def stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return instabil sp száma S a testre nézve
@cuda.jit(device=True)
def instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
print("Res size (byte): ", Cx.size*Dx.size*4*4)
print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx))
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return nyereg sp száma S az IJKL testre nézve
@cuda.jit(device=True)
def nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
\ No newline at end of file
return egyensulyi_mtx
#!/bin/bash
#SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból
#SBATCH --time=20:00:00 # maximális idő
#SBATCH -o politopok.out # kimeneti fájl
module load anaconda
for n in {40..55}
do
echo "python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10"
time python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10
done
#!/bin/bash
#SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból
#SBATCH --time=20:00:00 # maximális idő
#SBATCH -o politopok.out # kimeneti fájl
module load anaconda
for n in {20..30}
do
echo "python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10"
time python tetrarun.py -n $((n*2 + 1)) -v 100 -w 100
done
#bin/bash
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
\ No newline at end of file
......@@ -32,16 +32,15 @@ def main(argv):
outputfile = arg
elif opt in ("-p", "--plot"):
PLOT = True
print('Output file is: ', outputfile)
print('Conf: ', [n, v, w])
space = gen_angels_to_pick(n, PLOT)
Cx, Cy = angles_alap(space, PLOT)
Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
print(res)
#print(res)
if __name__ == "__main__":
main(sys.argv[1:])
\ No newline at end of file
import numpy
import cupy as cp
def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = numpy.linspace(0, numpy.log10(exponentialliness+1),N)
LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment