Merge branch 'add-pytorch' into 'master'

Add pytorch See merge request !3

Merge branch 'add-pytorch' into 'master'
Add pytorch See merge request !3
8818e0ad · Karsa Zoltán István · 3524218d · c479bed4 · 8818e0ad · 8818e0ad
Commit 8818e0ad authored Feb 03, 2023 by Karsa Zoltán István
Show whitespace changes
Inline Side-by-side

Showing with 134 additions and 10 deletions

.gitignore
+2 -0

filtering.cu
+34 -0

genax.py
+0 -6

tetrarun.py
+13 -4

utils.py
+85 -0

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,5 @@ coverage.xml
 # Sphinx documentation
 docs/_build/
 .vscode/
+
+*.out
--- a/filtering.cu
+++ b/filtering.cu
+__device__ inline bool check_exact_one(char* mtx) {
+    int sum = 0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            sum += mtx[i*4 + j];
+        }
+    }
+    return sum == 1;
+}
+
+__device__ inline bool check_exact_one(char* mtx, int S, int U) {
+    return check_exact_one(mtx) && mtx[S*4 + U] == 1;
+}
+
+__global__ void exact_one(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
+    int pos = blockDim.x * blockIdx.x + threadIdx.x;
+    if (pos >= len)
+        return;
+    ok_arr[pos] = check_exact_one(egysulyi_mtx + pos*16, S, U);
+}
+
+__device__ inline bool check_filter(char* mtx, int S, int U) {
+    if (mtx[4*S + U] == 1)
+        return true;
+    return false;
+}
+
+__global__ void filter(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
+    int pos = blockDim.x * blockIdx.x + threadIdx.x;
+    if (pos >= len)
+        return;
+    ok_arr[pos] = check_filter(egysulyi_mtx + pos*16, S, U);
+}
\ No newline at end of file
--- a/genax.py
+++ b/genax.py
@@ -98,9 +98,7 @@ def angles_alap(anglestopick, plot = False):
    tCx = tCy / tgtA

    Cx = cp.concatenate((hCx, tCx), axis=None)
-    Cx = cp.append(Cx, [0.5], axis=False)
    Cy = cp.concatenate((hCy, tCy), axis=None)
-    Cy = cp.append(Cy, [cp.sqrt(3.0)/2.0], axis=False)

    return Cx, Cy

@@ -142,10 +140,7 @@ def angles_ratet(anglestopick, plot = False):
    sin = cp.sin(anglestopick)

    Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
-    Dx = cp.append(Dx, [0.5], axis=False)
    Dy = cp.outer(cos, Ey).flatten()
-    Dy = cp.append(Dy, [cp.sqrt(3.0)/6.0], axis=False)
    Dz = cp.outer(sin, Ey).flatten()
-    Dz = cp.append(Dz, [cp.sqrt(2.0/3.0)], axis=False)

    return Dx, Dy, Dz
\ No newline at end of file
--- a/tetrarun.py
+++ b/tetrarun.py
@@ -2,17 +2,17 @@ import sys, getopt

 from genax import gen_angels_to_pick, angles_alap, angles_ratet
 from gpu import start_kernel
-from utils import convert, printresults
+from utils import convert, printresults, search, exact_one, exact_one_gpu, filter_gpu, writetofile

 def main(argv):
-   outputfile = 'out.txt'
+   outputfile = None
   n = 3
   v = 3
   w = 3
   PLOT = False

   try:
-      opts, args = getopt.getopt(argv,"hpn:v:w:d:")
+      opts, args = getopt.getopt(argv,"hpn:v:w:d:o:")
   except getopt.GetoptError as err:
      print(err)
      print ('tetrarun.py -n <range division:int> -v <> -w <> -o <outputfile>')
@@ -41,7 +41,15 @@ def main(argv):
   Dx, Dy, Dz = angles_ratet(space)

   res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
-   printresults(res)
+
+   if outputfile:
+      writetofile(outputfile, Cx, Cy, Dx, Dy, Dz, res)
+
+   #printresults(res)
+   #print("Exact one 3-3")
+   #exact_one_gpu(res, 3-1, 3-1)
+   #print("Filter 2-1")
+   #filter21 = filter_gpu(res, 2-1, 1-1)

 if __name__ == "__main__":
   main(sys.argv[1:])
\ No newline at end of file
--- a/utils.py
+++ b/utils.py
 import cupy as cp
+from functools import wraps
+import time
+import torch
 import numpy as np

+with open('filtering.cu') as f:
+    code = f.read()
+
+kers = ('exact_one', 'filter')
+ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
+exact_one_cuda = ep_pontok_module.get_function(kers[0])
+filter_cuda = ep_pontok_module.get_function(kers[1])
+
+def timeit(func):
+    @wraps(func)
+    def timeit_wrapper(*args, **kwargs):
+        start_time = time.perf_counter()
+        result = func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        print(f'Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds')
+        return result
+    return timeit_wrapper
+
 def expSpace(min, max, N, exponentialliness = 20.0):
    LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
    return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
@@ -35,6 +57,69 @@ def printresults(egyensulyi_mtx):
        print(np.resize(parok, (int(N/2), 2)))
        print()

+def writetofile(filename, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
+    lcm = compute_lcm(Cx.size, Dx.size)
+    Cx_cpu = Cx.get()
+    Cy_cpu = Cy.get()
+    Dx_cpu = Dx.get()
+    Dy_cpu = Dy.get()
+    Dz_cpu = Dz.get()
+    mtx_cpu = egyensulyi_mtx.get()
+
+    pos = Cx.size * Dx.size
+
+    f = open(filename, "w")
+    size_C = Cx.size
+    size_D = Dx.size
+    for i in range(0, pos):
+        parok = np.empty([0], dtype=np.int8)
+        for S in range(0, 4):
+            for U in range(0, 4):
+                if mtx_cpu[i][S][U] == 1:
+                    parok = np.append(parok, S+1)
+                    parok = np.append(parok, U+1)
+        N = parok.size
+        f.write(f"{Cx_cpu[i % size_C]}, {Cy_cpu[i % size_C]}, {Dx_cpu[(i + int(i / lcm)) % size_D]}, {Dy_cpu[(i + int(i / lcm)) % size_D]}, {Dz_cpu[(i + int(i / lcm)) % size_D]}\n")
+        f.write(np.array2string(np.resize(parok, (int(N/2), 2))))
+        f.write("\n")
+
+    f.close()
+        
+def search(egyensulyi_mtx, S, U):
+    for i in egyensulyi_mtx:
+        if i[S][U] == 1:
+            print(i)
+
+def exact_one(egyensulyi_mtx, S, U):
+    for i in egyensulyi_mtx:
+        ok = True
+        for j in range(0, 4):
+            for k in range(0, 4):
+                if j == S and k == U and i[j][k] == 0:
+                    ok = False
+                if j != S and k != U and i[j][k] == 1:
+                    ok = False
+                if j == S and k != U and i[j][k] == 1:
+                    ok = False
+                if j != S and k == U and i[j][k] == 1:
+                    ok = False
+        if ok:
+            print(i)
+
+def exact_one_gpu(egyensulyi_mtx, S, U):
+    size = int(egyensulyi_mtx.size / 16)
+    indexes = cp.zeros((size,), dtype=bool)
+    numBlock = int((size + 256 - 1) / 256)
+    exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
+    return egyensulyi_mtx[indexes]
+
+def filter_gpu(egyensulyi_mtx, S, U):
+    size = int(egyensulyi_mtx.size / 16)
+    indexes = cp.zeros((size,), dtype=cp.bool)
+    numBlock = int((size + 256 - 1) / 256)
+    filter_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
+    return egyensulyi_mtx[indexes]
+
 def compute_lcm(x, y):
   if x > y:
       greater = x