fix permutation

8e3a2100 · Zoltan Karsa · b6a9ae36 · 8e3a2100 · 8e3a2100 · 8e3a2100
Commit 8e3a2100 authored Jan 30, 2023 by Zoltan Karsa
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 20 deletions

epgpu.cu
+2 -2

genax.py
+0 -16

gpu.py
+4 -1

utils.py
+14 -1

No files found.
--- a/epgpu.cu
+++ b/epgpu.cu
@@ -292,12 +292,12 @@ __device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
 }
 __global__ void gpu_egyensulyi(int v, int w, double* Cx_arr, double* Cy_arr, 
-                double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D, char* egysulyi_mtx) {
+                double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D, int lcm, char* egysulyi_mtx) {
    int pos = blockDim.x * blockIdx.x + threadIdx.x;
    if (pos >= size_C*size_D)
        return;
    vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0);
-    vec3 D(Dx_arr[pos % size_D], Dy_arr[pos % size_D], Dz_arr[pos % size_D]);
+    vec3 D(Dx_arr[(pos + pos / lcm) % size_D], Dy_arr[(pos + pos / lcm) % size_D], Dz_arr[(pos + pos / lcm) % size_D]);
    ABC_oldal(v, w, C, D, egysulyi_mtx);
    BCD_oldal(v, w, C, D, egysulyi_mtx);

--- a/genax.py
+++ b/genax.py
@@ -98,14 +98,8 @@ def angles_alap(anglestopick, plot = False):
    tCx = tCy / tgtA
    Cx = cp.concatenate((hCx, tCx), axis=None)
-    if Cx.size == 0:
-        Cx = cp.array([0.5], dtype=cp.float64)
-    else:
    Cx = cp.append(Cx, [0.5], axis=False)
    Cy = cp.concatenate((hCy, tCy), axis=None)
-    if Cy.size == 0:
-        Cy = cp.array(cp.sqrt(3.0)/2.0, dtype=cp.float64)
-    else:
    Cy = cp.append(Cy, [cp.sqrt(3.0)/2.0], axis=False)
    return Cx, Cy
@@ -148,19 +142,10 @@ def angles_ratet(anglestopick, plot = False):
    sin = cp.sin(anglestopick)
    Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
-    if Dx.size == 0:
-        Dx = cp.array([0.5], dtype=cp.float64)
-    else:
    Dx = cp.append(Dx, [0.5], axis=False)
    Dy = cp.outer(cos, Ey).flatten()
-    if Dy.size == 0:
-        Dy = cp.array(cp.sqrt(3.0)/6.0, dtype=cp.float64)
-    else:
    Dy = cp.append(Dy, [cp.sqrt(3.0)/6.0], axis=False)
    Dz = cp.outer(sin, Ey).flatten()
-    if Dz.size == 0:
-        Dz = cp.array(cp.sqrt(2.0/3.0), dtype=cp.float64)
-    else:
    Dz = cp.append(Dz, [cp.sqrt(2.0/3.0)], axis=False)
    return Dx, Dy, Dz
\ No newline at end of file
--- a/gpu.py
+++ b/gpu.py
 from numba import cuda
 import cupy as cp
+from utils import compute_lcm
 with open('epgpu.cu') as f:
    code = f.read()
@@ -9,10 +10,12 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres
 fun = ep_pontok_module.get_function(kers[0])
 def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
+    print(f"Cnt: {Cx.size}x{Dx.size}={Cx.size*Dx.size}")
    print("Res size (byte): ", Cx.size*Dx.size*4*4)
+    lcm = compute_lcm(Cx.size, Dx.size)
    #print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
    egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
    numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
-    fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx))
+    fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, lcm, egyensulyi_mtx))
    return egyensulyi_mtx
--- a/utils.py
+++ b/utils.py
@@ -22,7 +22,7 @@ def convert(egyensulyi_mtx):
    return np.resize(parok, (int(N/2), 2))
 def printresults(egyensulyi_mtx):
-    print(egyensulyi_mtx.size / 4 / 4)
+    ossz = 0
    for i in egyensulyi_mtx:
        parok = np.empty([0], dtype=np.int8)
        for S in range(0, 4):
@@ -34,3 +34,15 @@ def printresults(egyensulyi_mtx):
        print(f"{int(N/2)}x2")
        print(np.resize(parok, (int(N/2), 2)))
        print()
+def compute_lcm(x, y):
+   if x > y:
+       greater = x
+   else:
+       greater = y
+   while(True):
+       if((greater % x == 0) and (greater % y == 0)):
+           lcm = greater
+           break
+       greater += 1
+   return lcm
\ No newline at end of file