mpi

dfdf4126 · Zoltan Karsa · b54c5583 · dfdf4126 · dfdf4126 · dfdf4126
Commit dfdf4126 authored Feb 08, 2023 by Zoltan Karsa
Hide whitespace changes
Inline Side-by-side

Showing with 64 additions and 13 deletions

genax.py
+38 -6

mpi.py
+10 -0

tetrarun.py
+8 -5

utils.py
+8 -2

No files found.
--- a/genax.py
+++ b/genax.py
 import cupy as cp, matplotlib.pyplot as plt
 from numba import cuda
 from utils import expSpace
+from mpi import size, rank

 def gen_angels_to_pick(n, plot = False):
    if n % 2 == 0:
@@ -67,7 +68,28 @@ void parosit2(const double* x1, const double* x2, double* a, double* b, const in
 }
 ''', 'parosit2')

-def angles_alap(anglestopick, plot = False):
+parosit2_mpi = cp.RawKernel(r'''
+extern "C" 
+__global__ 
+void parosit2_mpi(const double* x1, const double* x2, double* a, double* b, const int m, const int m2, const double PI) {
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    if (m <= tid || m*m2 <= tid*m+m2-1)
+        return;
+    double alpha = x1[tid];
+    for (int i = 0; i < m2; i++) {
+        double betha = x2[i];
+        if ((alpha + betha) < PI && alpha > 0.0) {
+            a[tid*m+i] = alpha;
+            b[tid*m+i] = betha;
+        } else {
+            a[tid*m+i] = -1.0;
+            b[tid*m+i] = -1.0;
+        }
+    }
+}
+''', 'parosit2_mpi')
+
+def angles_alap(anglestopick):
    m = anglestopick.size
    alpha_arr = cp.zeros((m, m), dtype=cp.float64)
    beta_arr = cp.zeros((m, m), dtype=cp.float64)
@@ -103,12 +125,22 @@ def angles_alap(anglestopick, plot = False):
    return Cx, Cy


-def angles_ratet(anglestopick, plot = False):
+def angles_ratet(anglestopick, mpi):
    m = anglestopick.size
-    alpha_arr = cp.zeros((m, m), dtype=cp.float64)
-    beta_arr = cp.zeros((m, m), dtype=cp.float64)
-    blocksize = int((m + 64 - 1) / 64)
-    parosit2((blocksize,), (m,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
+    if not mpi:
+        alpha_arr = cp.zeros((m, m), dtype=cp.float64)
+        beta_arr = cp.zeros((m, m), dtype=cp.float64)
+        blocksize = int((m + 64 - 1) / 64)
+        parosit2((blocksize,), (m,), (anglestopick, anglestopick, alpha_arr, beta_arr, m, cp.pi))
+    else:
+        also = int(rank/size * m)
+        felso = int((rank+1)/size * m)
+        anglestopick2 = anglestopick[also:felso]
+        m2 = anglestopick2.size
+        alpha_arr = cp.zeros((m, m2), dtype=cp.float64)
+        beta_arr = cp.zeros((m, m2), dtype=cp.float64)
+        blocksize = int((m + 64 - 1) / 64)
+        parosit2_mpi((blocksize,), (m,), (anglestopick, anglestopick2, alpha_arr, beta_arr, m, m2, cp.pi))

    tompa_beta_arr = beta_arr[beta_arr > cp.pi]
    tompa_beta_mpi_arr = tompa_beta_arr - cp.pi/2

--- a/mpi.py
+++ b/mpi.py
+try:
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+except:
+    comm = None
+    rank = None
+    size = None
\ No newline at end of file
--- a/tetrarun.py
+++ b/tetrarun.py
@@ -10,9 +10,10 @@ def main(argv):
   v = 3
   w = 3
   PLOT = False
+   mpi = False

   try:
-      opts, args = getopt.getopt(argv,"hpn:v:w:d:o:")
+      opts, args = getopt.getopt(argv,"hpn:v:w:d:o:m:")
   except getopt.GetoptError as err:
      print(err)
      print ('tetrarun.py -n <range division:int> -v <> -w <> -o <outputfile>')
@@ -33,12 +34,14 @@ def main(argv):
         outputfile = arg
      elif opt in ("-p", "--plot"):
         PLOT = True
+      elif opt in ("-m", "--mpi"):
+         mpi = True

   space = gen_angels_to_pick(n, PLOT)

-   Cx, Cy = angles_alap(space, PLOT)
+   Cx, Cy = angles_alap(space)

-   Dx, Dy, Dz = angles_ratet(space)
+   Dx, Dy, Dz = angles_ratet(space, mpi)

   res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)

@@ -56,8 +59,8 @@ def main(argv):
   res.get(out=mtx_cpu)
   
   if outputfile:
-      writetofile(outputfile+'.full', Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu)
-      writetofile2(outputfile, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu)
+      writetofile(outputfile+'.full', Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi)
+      writetofile2(outputfile, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi)

   #printresults(res)
   #print("Exact one 3-3")

--- a/utils.py
+++ b/utils.py
@@ -2,6 +2,7 @@ import cupy as cp
 from functools import wraps
 import time
 import numpy as np
+from mpi import size, rank

 with open('filtering.cu') as f:
    code = f.read()
@@ -56,12 +57,14 @@ def printresults(egyensulyi_mtx):
        print(np.resize(parok, (int(N/2), 2)))
        print()

-def writetofile(filename, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu):
+def writetofile(filename, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi):
    size_C = Cx_cpu.size
    size_D = Dx_cpu.size
    lcm = compute_lcm(size_C, size_D)
    pos = size_C * size_D

+    if mpi:
+        filename = f'R{rank}/{size}_' + filename
    f = open(filename, "w")

    for i in range(0, pos):
@@ -79,12 +82,15 @@ def writetofile(filename, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu):

    f.close()

-def writetofile2(filename, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu):
+def writetofile2(filename, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi):
    size_C = Cx_cpu.size
    size_D = Dx_cpu.size
    lcm = compute_lcm(size_C, size_D)
    pos = size_C * size_D

+    if mpi:
+        filename = f'R{rank}/{size}_' + filename
+
    f = open(filename, "w")
    for i in range(0, pos):
        s = 0