Commit 8e3a2100 by Zoltan Karsa

fix permutation

parent b6a9ae36
......@@ -292,12 +292,12 @@ __device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
}
__global__ void gpu_egyensulyi(int v, int w, double* Cx_arr, double* Cy_arr,
double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D, char* egysulyi_mtx) {
double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D, int lcm, char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= size_C*size_D)
return;
vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0);
vec3 D(Dx_arr[pos % size_D], Dy_arr[pos % size_D], Dz_arr[pos % size_D]);
vec3 D(Dx_arr[(pos + pos / lcm) % size_D], Dy_arr[(pos + pos / lcm) % size_D], Dz_arr[(pos + pos / lcm) % size_D]);
ABC_oldal(v, w, C, D, egysulyi_mtx);
BCD_oldal(v, w, C, D, egysulyi_mtx);
......
......@@ -98,15 +98,9 @@ def angles_alap(anglestopick, plot = False):
tCx = tCy / tgtA
Cx = cp.concatenate((hCx, tCx), axis=None)
if Cx.size == 0:
Cx = cp.array([0.5], dtype=cp.float64)
else:
Cx = cp.append(Cx, [0.5], axis=False)
Cx = cp.append(Cx, [0.5], axis=False)
Cy = cp.concatenate((hCy, tCy), axis=None)
if Cy.size == 0:
Cy = cp.array(cp.sqrt(3.0)/2.0, dtype=cp.float64)
else:
Cy = cp.append(Cy, [cp.sqrt(3.0)/2.0], axis=False)
Cy = cp.append(Cy, [cp.sqrt(3.0)/2.0], axis=False)
return Cx, Cy
......@@ -148,19 +142,10 @@ def angles_ratet(anglestopick, plot = False):
sin = cp.sin(anglestopick)
Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
if Dx.size == 0:
Dx = cp.array([0.5], dtype=cp.float64)
else:
Dx = cp.append(Dx, [0.5], axis=False)
Dx = cp.append(Dx, [0.5], axis=False)
Dy = cp.outer(cos, Ey).flatten()
if Dy.size == 0:
Dy = cp.array(cp.sqrt(3.0)/6.0, dtype=cp.float64)
else:
Dy = cp.append(Dy, [cp.sqrt(3.0)/6.0], axis=False)
Dy = cp.append(Dy, [cp.sqrt(3.0)/6.0], axis=False)
Dz = cp.outer(sin, Ey).flatten()
if Dz.size == 0:
Dz = cp.array(cp.sqrt(2.0/3.0), dtype=cp.float64)
else:
Dz = cp.append(Dz, [cp.sqrt(2.0/3.0)], axis=False)
Dz = cp.append(Dz, [cp.sqrt(2.0/3.0)], axis=False)
return Dx, Dy, Dz
\ No newline at end of file
from numba import cuda
import cupy as cp
from utils import compute_lcm
with open('epgpu.cu') as f:
code = f.read()
......@@ -9,10 +10,12 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres
fun = ep_pontok_module.get_function(kers[0])
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
print(f"Cnt: {Cx.size}x{Dx.size}={Cx.size*Dx.size}")
print("Res size (byte): ", Cx.size*Dx.size*4*4)
lcm = compute_lcm(Cx.size, Dx.size)
#print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx))
fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, lcm, egyensulyi_mtx))
return egyensulyi_mtx
......@@ -22,7 +22,7 @@ def convert(egyensulyi_mtx):
return np.resize(parok, (int(N/2), 2))
def printresults(egyensulyi_mtx):
print(egyensulyi_mtx.size / 4 / 4)
ossz = 0
for i in egyensulyi_mtx:
parok = np.empty([0], dtype=np.int8)
for S in range(0, 4):
......@@ -33,4 +33,16 @@ def printresults(egyensulyi_mtx):
N = parok.size
print(f"{int(N/2)}x2")
print(np.resize(parok, (int(N/2), 2)))
print()
\ No newline at end of file
print()
def compute_lcm(x, y):
if x > y:
greater = x
else:
greater = y
while(True):
if((greater % x == 0) and (greater % y == 0)):
lcm = greater
break
greater += 1
return lcm
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment