Commit b9a0229d by Zoltan Karsa

init

parents
env/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
# Sphinx documentation
docs/_build/
import numpy, matplotlib.pyplot as plt
from numba import cuda
from utils import expSpace
def gen_angels_to_pick(n, plot = False):
if n % 2 == 0:
raise "n%2==0"
N = int((n + 3) / 2)
X1 = expSpace(0.0, numpy.pi/2.0, N)
X3 = expSpace(numpy.pi/2.0, numpy.pi, N)
X3 = -1*X3 + 3.0*numpy.pi/2.0
anglestopick = numpy.concatenate((X1, X3), axis=None)
anglestopick = numpy.unique(anglestopick) # Vigyázni vele!
anglestopick = anglestopick[1:-1]
if plot:
Y = numpy.zeros(n)
plt.plot(anglestopick, Y, '|')
plt.show()
return anglestopick
def angles_alap(anglestopick, plot = False):
alpha_arr = numpy.array([])
beta_arr = numpy.array([])
for alpha in anglestopick:
for beta in anglestopick:
if alpha + beta < numpy.pi and beta >= alpha and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha)
beta_arr = numpy.insert(beta_arr, 0, beta)
tompa_beta_arr = beta_arr[beta_arr > numpy.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi]
hegyes_alpha_arr = alpha_arr[alpha_arr <= numpy.pi]
tghB = numpy.tan(hegyes_beta_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
# hegyes
sztgh = tghB*tghA
otgh = tghA + tghB
hCy = sztgh/otgh
hCx = hCy / tghA
# tompa
sztgt = tgtB_mpi*tgtA
mtgt = 1.0 - sztgt
tCy = tgtA / mtgt
tCx = tCy / tgtA
Cy = numpy.concatenate((hCy, tCy), axis=None)
Cx = numpy.concatenate((hCx, tCx), axis=None)
return Cx, Cy
def angles_ratet(anglestopick, plot = False):
alpha_arr = numpy.array([])
beta_arr = numpy.array([])
for alpha in anglestopick:
for beta in anglestopick:
if alpha + beta < numpy.pi and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha)
beta_arr = numpy.insert(beta_arr, 0, beta)
tompa_beta_arr = beta_arr[beta_arr > numpy.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi]
hegyes_alpha_arr = alpha_arr[alpha_arr <= numpy.pi]
tghB = numpy.tan(hegyes_beta_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
# hegyes
sztgh = tghB*tghA
otgh = tghA + tghB
hCy = sztgh/otgh
hCx = hCy / tghA
# tompa
sztgt = tgtB_mpi*tgtA
mtgt = 1.0 - sztgt
tCy = tgtA / mtgt
tCx = tCy / tgtA
Ey = numpy.concatenate((hCy, tCy), axis=None)
Ex = numpy.concatenate((hCx, tCx), axis=None)
cos = numpy.cos(anglestopick)
sin = numpy.sin(anglestopick)
Dx = numpy.outer(numpy.full(anglestopick.size, 1.0), Ex).flatten()
Dy = numpy.outer(cos, Ey).flatten()
Dz = numpy.outer(sin, Ey).flatten()
return Dx, Dy, Dz
\ No newline at end of file
from numba import cuda
import numpy
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
Cx_global = cuda.to_device(Cx)
Cy_global = cuda.to_device(Cy)
Dx_global = cuda.to_device(Dx)
Dy_global = cuda.to_device(Dy)
Dz_global = cuda.to_device(Dz)
egyensulyi_mtx = cuda.device_array((Dz.size, 4, 4))
gpu_egyensuly[256, 256](v, w, Cx_global, Cy_global, Dx_global, Dy_global, Dz_global, egyensulyi_mtx)
return egyensulyi_mtx.copy_to_host()
@cuda.jit
def gpu_egyensuly(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
ABC_oldal(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx)
@cuda.jit(device=True)
def ABC_oldal(v, w, Cx_arr, Cy_arr, Dx_arr, Dy_arr, Dz_arr, egyensulyi_mtx):
pos = cuda.grid(1)
if pos >= Dx_arr.size:
return
Cx = Cx_arr[pos]
Cy = Cy_arr[pos]
Dx = Dx_arr[pos]
Dy = Dy_arr[pos]
Dz = Dz_arr[pos]
ABx = 1.0/v
ABy = 0.0
ABz = 0.0
ACx = (Cx - 0.0)/v
ACy = (Cy - 0.0)/v
ACz = 0.0 # (0.0 - 0.0)/v
for i in range(v+1):
for j in range(v+1):
oKx = i * ABx + j * ACx
oKy = i * ABy + j * ACy
oKz = i * ABz + j * ACz
Lx = (Dx - oKx)/w
Ly = (Dy - oKy)/w
Lz = (Dz - oKz)/w
for k in range(w+1):
Sx = oKx + Lx*k
Sy = oKy + Ly*k
Sz = oKz + Lz*k
S = stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
U = instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
H = nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
if S + U - H == 2:
egyensulyi_mtx[pos, S, U] = 1
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return stabil sp száma S súlypontnál
@cuda.jit(device=True)
def stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return instabil sp száma S a testre nézve
@cuda.jit(device=True)
def instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return nyereg sp száma S az IJKL testre nézve
@cuda.jit(device=True)
def nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
\ No newline at end of file
from cuda.cuda import CUdevice_attribute, cuDeviceGetAttribute, cuDeviceGetName, cuInit
# Initialize CUDA Driver API
(err,) = cuInit(0)
# Get attributes
err, DEVICE_NAME = cuDeviceGetName(128, 0)
DEVICE_NAME = DEVICE_NAME.decode("ascii").replace("\x00", "")
err, MAX_THREADS_PER_BLOCK = cuDeviceGetAttribute(
CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, 0
)
err, MAX_BLOCK_DIM_X = cuDeviceGetAttribute(
CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, 0
)
err, MAX_GRID_DIM_X = cuDeviceGetAttribute(
CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, 0
)
err, SMs = cuDeviceGetAttribute(
CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 0
)
print(f"Device Name: {DEVICE_NAME}")
print(f"Maximum number of multiprocessors: {SMs}")
print(f"Maximum number of threads per block: {MAX_THREADS_PER_BLOCK:10}")
print(f"Maximum number of blocks per grid: {MAX_BLOCK_DIM_X:10}")
print(f"Maximum number of threads per grid: {MAX_GRID_DIM_X:10}")
import sys, getopt
from genax import gen_angels_to_pick, angles_alap, angles_ratet
from gpu import start_kernel
def main(argv):
outputfile = 'out.txt'
n = 3
v = 3
w = 3
PLOT = False
try:
opts, args = getopt.getopt(argv,"hpn:v:w:d:")
except getopt.GetoptError as err:
print(err)
print ('tetrarun.py -n <range division:int> -v <> -w <> -o <outputfile>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print ('tetrarun.py -n <range divison> -v <> -w <> -o <outputfile>')
sys.exit()
elif opt in ("-n", "--rangediv"):
n = int(arg)
if n % 2 == 0:
raise "n%2==0"
elif opt in ("-v", "--rangediv"):
v = int(arg)
elif opt in ("-w", "--rangediv"):
w = int(arg)
elif opt in ("-o", "--ofile"):
outputfile = arg
elif opt in ("-p", "--plot"):
PLOT = True
print('Output file is: ', outputfile)
print('Conf: ', [n, v, w])
space = gen_angels_to_pick(n, PLOT)
Cx, Cy = angles_alap(space, PLOT)
Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
print(res)
if __name__ == "__main__":
main(sys.argv[1:])
\ No newline at end of file
import numpy
def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = numpy.linspace(0, numpy.log10(exponentialliness+1),N)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment