o

f9b951c8 · Zoltan Karsa · d96ae1d3 · f9b951c8 · f9b951c8
Commit f9b951c8 authored Feb 02, 2023 by Zoltan Karsa
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 6 deletions

tetrarun.py
+4 -3

utils.py
+8 -3

No files found.
--- a/tetrarun.py
+++ b/tetrarun.py
@@ -43,9 +43,9 @@ def main(argv):
   res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
   #printresults(res)
   print("Exact one 3-3")
-   exact_one33 = exact_one_gpu(res, 3-1, 3-1)
-   print("Filter 2-1")
-   filter21 = filter_gpu(res, 2-1, 1-1)
+   exact_one_gpu(res, 3-1, 3-1)
+   #print("Filter 2-1")
+   #filter21 = filter_gpu(res, 2-1, 1-1)

 if __name__ == "__main__":
   main(sys.argv[1:])
\ No newline at end of file
--- a/utils.py
+++ b/utils.py
 import cupy as cp
-import numpy as np
 from functools import wraps
 import time
+import torch

 with open('filtering.cu') as f:
    code = f.read()
@@ -79,10 +79,15 @@ def exact_one(egyensulyi_mtx, S, U):

 def exact_one_gpu(egyensulyi_mtx, S, U):
    size = int(egyensulyi_mtx.size / 16)
-    indexes = cp.zeros((size,), dtype=cp.bool)
+    indexes = cp.zeros((size,), dtype=bool)
    numBlock = int((size + 256 - 1) / 256)
    exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
-    return egyensulyi_mtx[indexes]
+    t_emtx = torch.as_tensor(egyensulyi_mtx, device=torch.device('cuda'), dtype=torch.int8)
+    assert t_emtx.__cuda_array_interface__['data'][0] == egyensulyi_mtx.__cuda_array_interface__['data'][0]
+    t_ind = torch.as_tensor(indexes, device=torch.device('cuda'), dtype=torch.bool)
+    t_ind = t_ind.unsqueeze(-1).unsqueeze(-1).expand(-1, 4, 4)
+    res = torch.masked_select(t_emtx, t_ind)
+    return res

 def filter_gpu(egyensulyi_mtx, S, U):
    size = int(egyensulyi_mtx.size / 16)