Commit ae7ef0d9 by Karsa Zoltán István

Merge branch 'cupy' into 'master'

Cupy

See merge request !1
parents e9a4645a c274dad0
...@@ -49,3 +49,4 @@ coverage.xml ...@@ -49,3 +49,4 @@ coverage.xml
# Sphinx documentation # Sphinx documentation
docs/_build/ docs/_build/
.vscode/
\ No newline at end of file
//--------------------------
struct vec3 {
//--------------------------
double x, y, z;
__device__ vec3(double x0 = 0, double y0 = 0, double z0 = 0) { x = x0; y = y0; z = z0; }
__device__ vec3 operator*(double a) const { return vec3(x * a, y * a, z * a); }
__device__ vec3 operator/(double a) const { return vec3(x / a, y / a, z / a); }
__device__ vec3 operator+(const vec3& v) const { return vec3(x + v.x, y + v.y, z + v.z); }
__device__ vec3 operator-(const vec3& v) const { return vec3(x - v.x, y - v.y, z - v.z); }
__device__ vec3 operator*(const vec3& v) const { return vec3(x * v.x, y * v.y, z * v.z); }
__device__ vec3 operator-() const { return vec3(-x, -y, -z); }
__device__ double& operator[](int i) { return *(&x + i); }
};
__device__ inline double dot(const vec3& v1, const vec3& v2) { return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z); }
__device__ inline double length(const vec3& v) { return sqrtf(dot(v, v)); }
__device__ inline vec3 normalize(const vec3& v) { return v * (1 / length(v)); }
__device__ inline vec3 cross(const vec3& v1, const vec3& v2) {
return vec3(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z, v1.x * v2.y - v1.y * v2.x);
}
__device__ inline vec3 operator*(double a, const vec3& v) { return vec3(v.x * a, v.y * a, v.z * a); }
//---------------------------
struct mat3 { // row-major matrix 4x4
//---------------------------
vec3 rows[4];
public:
__device__ mat3() {}
__device__ mat3(double m00, double m01, double m02,
double m10, double m11, double m12,
double m20, double m21, double m22,
double m30, double m31, double m32) {
rows[0][0] = m00; rows[0][1] = m01; rows[0][2] = m02;
rows[1][0] = m10; rows[1][1] = m11; rows[1][2] = m12;
rows[2][0] = m20; rows[2][1] = m21; rows[2][2] = m22;
rows[3][0] = m30; rows[3][1] = m31; rows[3][2] = m32;
}
__device__ mat3(vec3 it, vec3 jt, vec3 kt) {
rows[0] = it; rows[1] = jt; rows[2] = kt;
}
__device__ vec3& operator[](int i) { return rows[i]; }
__device__ vec3 operator[](int i) const { return rows[i]; }
__device__ operator double*() const { return (double*)this; }
};
__device__ inline vec3 operator*(const vec3& v, const mat3& mat) {
return v.x * mat[0] + v.y * mat[1] + v.z * mat[2];
}
__device__ inline mat3 operator*(const mat3& left, const mat3& right) {
mat3 result;
for (int i = 0; i < 4; i++) result.rows[i] = left.rows[i] * right;
return result;
}
__device__ inline double signeddistance(const vec3& planeNN, const vec3& planeP, const vec3& Q) {
vec3 PQ = Q - planeP;
return dot(PQ, planeNN);
}
__device__ inline vec3 intersection(const vec3& planeNN, const vec3& planeP, const vec3& Q) {
double t = (dot(planeNN, Q) - dot(planeNN, planeP)) / (dot(planeNN, planeNN));
return (Q - planeNN*t);
}
__device__ inline bool intriangle(const vec3& Q, const vec3& A, const vec3& B, const vec3& C) {
vec3 planeNN = cross(B-A, C-A);
double len = dot(planeNN, planeNN);
vec3 aN = cross(C-B, Q-B);
vec3 bN = cross(A-C, Q-C);
vec3 cN = cross(B-A, Q-A);
double alpha = dot(planeNN, aN) / len;
double betha = dot(planeNN, bN) / len;
double gamma = dot(planeNN, cN) / len;
if (0.0 < alpha && alpha < 1.0 && 0.0 < betha && betha < 1.0 && 0.0 < gamma && gamma < 1.0)
return true;
return false;
}
__device__ inline int stabil_ep(const vec3& S, const vec3& C, const vec3& D) {
int cnt = 0;
vec3 A(0, 0, 0), B(0, 0, 1.0);
// ABC oldal:
vec3 planeNN = normalize(cross(B-A, C-A));
vec3 P = intersection(planeNN, A, S);
if (intriangle(P, A, B, C))
cnt++;
// BCD oldal:
planeNN = normalize(cross(B-C, C-D));
P = intersection(planeNN, B, S);
if (intriangle(P, D, B, C))
cnt++;
// CDA oldal:
planeNN = normalize(cross(C-A, D-A));
P = intersection(planeNN, A, S);
if (intriangle(P, A, D, C))
cnt++;
// DAB oldal:
planeNN = normalize(cross(D-A, B-A));
P = intersection(planeNN, D, S);
if (intriangle(P, A, D, B))
cnt++;
return cnt;
}
__device__ inline bool instabil_ell(const vec3& S, const vec3& X, const vec3& A, const vec3& B, const vec3& C) {
vec3 planeN = normalize(X - S);
double signDistanceA = signeddistance(planeN, X, A);
double signDistanceB = signeddistance(planeN, X, B);
double signDistanceC = signeddistance(planeN, X, C);
if (signDistanceA <= 0.0 && signDistanceB <= 0.0 && signDistanceC <= 0.0)
return true;
if (signDistanceA > 0.0 && signDistanceB > 0.0 && signDistanceC > 0.0)
return true;
return false;
}
__device__ inline int instabil_ep(const vec3& S, const vec3& C, const vec3& D) {
int cnt = 0;
vec3 A(0, 0, 0), B(0, 0, 1.0);
// D CSUCSNAL
if (instabil_ell(S, D, A, B, C))
cnt++;
// A CSUCSNAL
if (instabil_ell(S, A, D, B, C))
cnt++;
// B CSUCSNAL
if (instabil_ell(S, B, A, D, C))
cnt++;
// C CSUCSNAL
if (instabil_ell(S, C, A, B, D))
cnt++;
return cnt;
}
__device__ inline vec3 lineintersect(const vec3& S, const vec3& A, const vec3& B) {
return A + (dot(S - A, B - A) * (B - A)) / dot(B - A, B - A);
}
__device__ inline bool nyereg_ell(const vec3& S, const vec3& X1, const vec3& X2, const vec3& A, const vec3& B) {
vec3 intersect = lineintersect(S, X1, X2);
double KAC = dot(X2 - X1, S - X1);
double KAB = dot(X2 - X1, X2 - X1);
if (0.0 < KAC && KAC < KAB) {
vec3 planeN = normalize(intersect - S);
double signDistanceA = signeddistance(planeN, intersect, A);
double signDistanceB = signeddistance(planeN, intersect, B);
if (signDistanceA <= 0.0 && signDistanceB <= 0.0)
return true;
if (signDistanceA > 0.0 && signDistanceB > 0.0)
return true;
}
return false;
}
__device__ inline int nyereg_ep(const vec3& S, const vec3& C, const vec3& D) {
int cnt = 0;
vec3 A(0, 0, 0), B(0, 0, 1.0);
if (nyereg_ell(S, A, B, C, D))
cnt++;
if (nyereg_ell(S, B, C, D, A))
cnt++;
if (nyereg_ell(S, C, D, A, B))
cnt++;
if (nyereg_ell(S, A, D, C, A))
cnt++;
if (nyereg_ell(S, A, C, B, D))
cnt++;
if (nyereg_ell(S, D, B, C, A))
cnt++;
return cnt;
}
__device__ void ABC_oldal(int v, int w, const vec3& C, const vec3& D, char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
vec3 AB(1.0/v, 0.0, 0.0);
vec3 AC = C/v;
for (double i = 1.0; i < v; i++)
{
for (double j = 1.0; j < v; j++)
{
vec3 K = i*AB + j * AC;
vec3 L = (D - K)/w;
for (double k = 1.0; k < w; k++)
{
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int U = instabil_ep(Sv, C, D);
int H = nyereg_ep(Sv, C, D);
if (S > 0 && U > 0 && S + U - H == 2)
egysulyi_mtx[pos*16+(S-1)*4+(U-1)] = 1;
}
}
}
}
__device__ void BCD_oldal(int v, int w, const vec3& C, const vec3& D, char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
vec3 A(0.0, 0.0, 0.0), B(0.0, 0.0, 1.0);
vec3 BC = (C - B) / v;
vec3 BD = (D - B) / v;
for (double i = 1.0; i < v; i++)
{
for (double j = 1.0; j < v; j++)
{
vec3 K = B + i * BC + j * BD;
vec3 L = (A - K)/w;
for (double k = 1.0; k < w; k++)
{
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int U = instabil_ep(Sv, C, D);
int H = nyereg_ep(Sv, C, D);
if (S > 0 && U > 0 && S + U - H == 2)
egysulyi_mtx[pos*16+(S-1)*4+(U-1)] = 1;
}
}
}
}
__device__ void CDA_oldal(int v, int w, const vec3& C, const vec3& D, char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
vec3 A(0.0, 0.0, 0.0), B(0.0, 0.0, 1.0);
vec3 CA = (A - C) / v;
vec3 CD = (D - C) / v;
for (double i = 1.0; i < v; i++)
{
for (double j = 1.0; j < v; j++)
{
vec3 K = C + i * CA + j * CD;
vec3 L = (B - K)/w;
for (double k = 1.0; k < w; k++)
{
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int U = instabil_ep(Sv, C, D);
int H = nyereg_ep(Sv, C, D);
if (S > 0 && U > 0 && S + U - H == 2)
egysulyi_mtx[pos*16+(S-1)*4+(U-1)] = 1;
}
}
}
}
__device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
vec3 A(0.0, 0.0, 0.0), B(0.0, 0.0, 1.0);
vec3 DA = (A - D) / v;
vec3 DB = (B - D) / v;
for (double i = 1.0; i < v; i++)
{
for (double j = 1.0; j < v; j++)
{
vec3 K = D + i * DA + j * DB;
vec3 L = (C - K)/w;
for (double k = 1.0; k < w; k++)
{
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int U = instabil_ep(Sv, C, D);
int H = nyereg_ep(Sv, C, D);
if (S > 0 && U > 0 && S + U - H == 2)
egysulyi_mtx[pos*16+(S-1)*4+(U-1)] = 1;
}
}
}
}
__global__ void gpu_egyensulyi(int v, int w, double* Cx_arr, double* Cy_arr,
double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D, char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= size_C*size_D)
return;
vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0);
vec3 D(Dx_arr[pos % size_D], Dy_arr[pos % size_D], Dz_arr[pos % size_D]);
ABC_oldal(v, w, C, D, egysulyi_mtx);
BCD_oldal(v, w, C, D, egysulyi_mtx);
CDA_oldal(v, w, C, D, egysulyi_mtx);
DAB_oldal(v, w, C, D, egysulyi_mtx);
}
\ No newline at end of file
import numpy, matplotlib.pyplot as plt import cupy as cp, matplotlib.pyplot as plt
from numba import cuda from numba import cuda
from utils import expSpace from utils import expSpace
...@@ -6,46 +6,87 @@ def gen_angels_to_pick(n, plot = False): ...@@ -6,46 +6,87 @@ def gen_angels_to_pick(n, plot = False):
if n % 2 == 0: if n % 2 == 0:
raise "n%2==0" raise "n%2==0"
N = int((n + 3) / 2) N = int((n + 3) / 2)
X1 = expSpace(0.0, numpy.pi/2.0, N) X1 = expSpace(0.0, cp.pi/2.0, N)
X3 = expSpace(numpy.pi/2.0, numpy.pi, N) X3 = expSpace(cp.pi/2.0, cp.pi, N)
X3 = -1*X3 + 3.0*numpy.pi/2.0 X3 = -1*X3 + 3.0*cp.pi/2.0
anglestopick = numpy.concatenate((X1, X3), axis=None) anglestopick = cp.concatenate((X1, X3), axis=None)
anglestopick = numpy.unique(anglestopick) # Vigyázni vele! anglestopick = cp.unique(anglestopick) # Vigyázni vele!
anglestopick = anglestopick[1:-1] anglestopick = anglestopick[1:-1]
if plot: if plot:
Y = numpy.zeros(n) Y = cp.zeros(n)
plt.plot(anglestopick, Y, '|') plt.plot(anglestopick, Y, '|')
plt.show() plt.show()
return anglestopick return anglestopick
parosit = cp.RawKernel(r'''
extern "C"
__global__
void parosit(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid];
if (m*m <= tid*m+m-1)
return;
for (int i = 0; i < m; i++) {
float betha = x2[i];
if ((alpha + betha) < PI && betha >= alpha && alpha > 0.0) {
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
} else {
a[tid*m+i] = -1.0;
b[tid*m+i] = -1.0;
}
}
}
''', 'parosit')
parosit2 = cp.RawKernel(r'''
extern "C"
__global__
void parosit2(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid];
for (int i = 0; i < m; i++) {
float betha = x2[i];
if ((alpha + betha) < PI && alpha > 0.0) {
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
} else {
a[tid*m+i] = -1.0;
b[tid*m+i] = -1.0;
}
}
}
''', 'parosit2')
def angles_alap(anglestopick, plot = False): def angles_alap(anglestopick, plot = False):
alpha_arr = numpy.array([]) m = anglestopick.size
beta_arr = numpy.array([]) alpha_arr = cp.zeros((m, m), dtype=cp.float64)
beta_arr = cp.zeros((m, m), dtype=cp.float64)
for alpha in anglestopick: blocksize = int((m + 64 - 1) / 64)
for beta in anglestopick: parosit((blocksize,), (64,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
if alpha + beta < numpy.pi and beta >= alpha and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha) tompa_beta_arr = beta_arr[beta_arr > cp.pi]
beta_arr = numpy.insert(beta_arr, 0, beta) tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2
hegyes_beta_arr = beta_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tompa_beta_arr = beta_arr[beta_arr > numpy.pi] tompa_alpha_arr = alpha_arr[beta_arr > cp.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2 hegyes_alpha_arr = alpha_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi] tghB = cp.tan(hegyes_beta_arr)
hegyes_alpha_arr = alpha_arr[beta_arr <= numpy.pi] tghA = cp.tan(hegyes_alpha_arr)
tgtB_mpi = cp.tan(tompa_beta_mpi_arr)
tghB = numpy.tan(hegyes_beta_arr) tgtA = cp.tan(tompa_alpha_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
# hegyes # hegyes
sztgh = tghB*tghA sztgh = tghB * tghA
otgh = tghA + tghB otgh = tghA + tghB
hCy = sztgh/otgh hCy = sztgh / otgh
hCx = hCy / tghA hCx = hCy / tghA
# tompa # tompa
...@@ -54,32 +95,29 @@ def angles_alap(anglestopick, plot = False): ...@@ -54,32 +95,29 @@ def angles_alap(anglestopick, plot = False):
tCy = tgtA / mtgt tCy = tgtA / mtgt
tCx = tCy / tgtA tCx = tCy / tgtA
Cy = numpy.concatenate((hCy, tCy), axis=None) Cy = cp.concatenate((hCy, tCy), axis=None)
Cx = numpy.concatenate((hCx, tCx), axis=None) Cx = cp.concatenate((hCx, tCx), axis=None)
return Cx, Cy return Cx, Cy
def angles_ratet(anglestopick, plot = False): def angles_ratet(anglestopick, plot = False):
alpha_arr = numpy.array([]) m = anglestopick.size
beta_arr = numpy.array([]) alpha_arr = cp.zeros((m, m), dtype=cp.float64)
beta_arr = cp.zeros((m, m), dtype=cp.float64)
for alpha in anglestopick: blocksize = int((m + 64 - 1) / 64)
for beta in anglestopick: parosit2((blocksize,), (m,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
if alpha + beta < numpy.pi and alpha != 0.0: # vigyázni
alpha_arr = numpy.insert(alpha_arr, 0, alpha) tompa_beta_arr = beta_arr[beta_arr > cp.pi]
beta_arr = numpy.insert(beta_arr, 0, beta) tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2
hegyes_beta_arr = beta_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
tompa_beta_arr = beta_arr[beta_arr > numpy.pi] tompa_alpha_arr = alpha_arr[beta_arr > cp.pi]
tompa_beta_mpi_arr = tompa_beta_arr - numpy.pi/2 hegyes_alpha_arr = alpha_arr[(beta_arr <= cp.pi) & (beta_arr > 0.0)]
hegyes_beta_arr = beta_arr[beta_arr <= numpy.pi]
tompa_alpha_arr = alpha_arr[beta_arr > numpy.pi] tghB = cp.tan(hegyes_beta_arr)
hegyes_alpha_arr = alpha_arr[beta_arr <= numpy.pi] tghA = cp.tan(hegyes_alpha_arr)
tgtB_mpi = cp.tan(tompa_beta_mpi_arr)
tghB = numpy.tan(hegyes_beta_arr) tgtA = cp.tan(tompa_alpha_arr)
tghA = numpy.tan(hegyes_alpha_arr)
tgtB_mpi = numpy.tan(tompa_beta_mpi_arr)
tgtA = numpy.tan(tompa_alpha_arr)
# hegyes # hegyes
sztgh = tghB*tghA sztgh = tghB*tghA
...@@ -93,14 +131,14 @@ def angles_ratet(anglestopick, plot = False): ...@@ -93,14 +131,14 @@ def angles_ratet(anglestopick, plot = False):
tCy = tgtA / mtgt tCy = tgtA / mtgt
tCx = tCy / tgtA tCx = tCy / tgtA
Ey = numpy.concatenate((hCy, tCy), axis=None) Ey = cp.concatenate((hCy, tCy), axis=None)
Ex = numpy.concatenate((hCx, tCx), axis=None) Ex = cp.concatenate((hCx, tCx), axis=None)
cos = numpy.cos(anglestopick) cos = cp.cos(anglestopick)
sin = numpy.sin(anglestopick) sin = cp.sin(anglestopick)
Dx = numpy.outer(numpy.full(anglestopick.size, 1.0), Ex).flatten() Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
Dy = numpy.outer(cos, Ey).flatten() Dy = cp.outer(cos, Ey).flatten()
Dz = numpy.outer(sin, Ey).flatten() Dz = cp.outer(sin, Ey).flatten()
return Dx, Dy, Dz return Dx, Dy, Dz
\ No newline at end of file
from numba import cuda from numba import cuda
import numpy import cupy as cp
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w): with open('epgpu.cu') as f:
Cx_global = cuda.to_device(Cx) code = f.read()
Cy_global = cuda.to_device(Cy)
Dx_global = cuda.to_device(Dx)
Dy_global = cuda.to_device(Dy)
Dz_global = cuda.to_device(Dz)
egyensulyi_mtx = cuda.device_array((Dz.size, 4, 4))
gpu_egyensuly[256, 256](v, w, Cx_global, Cy_global, Dx_global, Dy_global, Dz_global, egyensulyi_mtx)
return egyensulyi_mtx.copy_to_host()
@cuda.jit
def gpu_egyensuly(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
ABC_oldal(v, w, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx)
@cuda.jit(device=True)
def ABC_oldal(v, w, Cx_arr, Cy_arr, Dx_arr, Dy_arr, Dz_arr, egyensulyi_mtx):
pos = cuda.grid(1)
if pos >= Dx_arr.size:
return
Cx = Cx_arr[pos]
Cy = Cy_arr[pos]
Dx = Dx_arr[pos]
Dy = Dy_arr[pos]
Dz = Dz_arr[pos]
ABx = 1.0/v
ABy = 0.0
ABz = 0.0
ACx = (Cx - 0.0)/v kers = ('gpu_egyensulyi', )
ACy = (Cy - 0.0)/v ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
ACz = 0.0 # (0.0 - 0.0)/v fun = ep_pontok_module.get_function(kers[0])
for i in range(v+1): def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
for j in range(v+1): print("Res size (byte): ", Cx.size*Dx.size*4*4)
oKx = i * ABx + j * ACx print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
oKy = i * ABy + j * ACy egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
oKz = i * ABz + j * ACz numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
Lx = (Dx - oKx)/w fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx))
Ly = (Dy - oKy)/w
Lz = (Dz - oKz)/w
for k in range(w+1):
Sx = oKx + Lx*k
Sy = oKy + Ly*k
Sz = oKz + Lz*k
S = stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
U = instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
H = nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz)
if S + U - H == 2:
egyensulyi_mtx[pos, S, U] = 1
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return stabil sp száma S súlypontnál
@cuda.jit(device=True)
def stabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok
# return instabil sp száma S a testre nézve
@cuda.jit(device=True)
def instabil_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
# Sxyz - a skp pontja, (0,0,0), (0,0,1), C(x,y,0), D(x,y,z) a tetraéderhez tartozó csúcspontok return egyensulyi_mtx
# return nyereg sp száma S az IJKL testre nézve
@cuda.jit(device=True)
def nyereg_ep(Sx, Sy, Sz, Cx, Cy, Dx, Dy, Dz):
return 0
\ No newline at end of file
#!/bin/bash
#SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból
#SBATCH --time=20:00:00 # maximális idő
#SBATCH -o politopok.out # kimeneti fájl
module load anaconda
for n in {40..55}
do
echo "python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10"
time python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10
done
#!/bin/bash
#SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból
#SBATCH --time=20:00:00 # maximális idő
#SBATCH -o politopok.out # kimeneti fájl
module load anaconda
for n in {20..30}
do
echo "python tetrarun.py -n $((n*2 + 1)) -v 10 -w 10"
time python tetrarun.py -n $((n*2 + 1)) -v 100 -w 100
done
#bin/bash
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
time python tetrarun.py -n 521 -v 100 -w 100
\ No newline at end of file
...@@ -32,16 +32,15 @@ def main(argv): ...@@ -32,16 +32,15 @@ def main(argv):
outputfile = arg outputfile = arg
elif opt in ("-p", "--plot"): elif opt in ("-p", "--plot"):
PLOT = True PLOT = True
print('Output file is: ', outputfile)
print('Conf: ', [n, v, w])
space = gen_angels_to_pick(n, PLOT) space = gen_angels_to_pick(n, PLOT)
Cx, Cy = angles_alap(space, PLOT) Cx, Cy = angles_alap(space, PLOT)
Dx, Dy, Dz = angles_ratet(space) Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w) res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
print(res) #print(res)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main(sys.argv[1:])
\ No newline at end of file
import numpy import cupy as cp
def expSpace(min, max, N, exponentialliness = 20.0): def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = numpy.linspace(0, numpy.log10(exponentialliness+1),N) LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment