Commit f9b951c8 by Zoltan Karsa

o

parent d96ae1d3
......@@ -43,9 +43,9 @@ def main(argv):
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
#printresults(res)
print("Exact one 3-3")
exact_one33 = exact_one_gpu(res, 3-1, 3-1)
print("Filter 2-1")
filter21 = filter_gpu(res, 2-1, 1-1)
exact_one_gpu(res, 3-1, 3-1)
#print("Filter 2-1")
#filter21 = filter_gpu(res, 2-1, 1-1)
if __name__ == "__main__":
main(sys.argv[1:])
\ No newline at end of file
import cupy as cp
import numpy as np
from functools import wraps
import time
import torch
with open('filtering.cu') as f:
code = f.read()
......@@ -79,10 +79,15 @@ def exact_one(egyensulyi_mtx, S, U):
def exact_one_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=cp.bool)
indexes = cp.zeros((size,), dtype=bool)
numBlock = int((size + 256 - 1) / 256)
exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
t_emtx = torch.as_tensor(egyensulyi_mtx, device=torch.device('cuda'), dtype=torch.int8)
assert t_emtx.__cuda_array_interface__['data'][0] == egyensulyi_mtx.__cuda_array_interface__['data'][0]
t_ind = torch.as_tensor(indexes, device=torch.device('cuda'), dtype=torch.bool)
t_ind = t_ind.unsqueeze(-1).unsqueeze(-1).expand(-1, 4, 4)
res = torch.masked_select(t_emtx, t_ind)
return res
def filter_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment