Commit d96ae1d3 by Zoltan Karsa

ok

parent 5fb0d82c
...@@ -43,9 +43,9 @@ def main(argv): ...@@ -43,9 +43,9 @@ def main(argv):
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w) res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
#printresults(res) #printresults(res)
print("Exact one 3-3") print("Exact one 3-3")
print(exact_one_gpu(res, 3-1, 3-1)) exact_one33 = exact_one_gpu(res, 3-1, 3-1)
print("Filter 2-1") print("Filter 2-1")
print(filter_gpu(res, 2-1, 1-1)) filter21 = filter_gpu(res, 2-1, 1-1)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main(sys.argv[1:])
\ No newline at end of file
import cupy as cp import cupy as cp
import numpy as np import numpy as np
from functools import wraps
import time
with open('filtering.cu') as f: with open('filtering.cu') as f:
code = f.read() code = f.read()
...@@ -9,6 +11,17 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres ...@@ -9,6 +11,17 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres
exact_one_cuda = ep_pontok_module.get_function(kers[0]) exact_one_cuda = ep_pontok_module.get_function(kers[0])
filter_cuda = ep_pontok_module.get_function(kers[1]) filter_cuda = ep_pontok_module.get_function(kers[1])
def timeit(func):
@wraps(func)
def timeit_wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
total_time = end_time - start_time
print(f'Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds')
return result
return timeit_wrapper
def expSpace(min, max, N, exponentialliness = 20.0): def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64) LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
...@@ -71,7 +84,6 @@ def exact_one_gpu(egyensulyi_mtx, S, U): ...@@ -71,7 +84,6 @@ def exact_one_gpu(egyensulyi_mtx, S, U):
exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U)) exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes] return egyensulyi_mtx[indexes]
def filter_gpu(egyensulyi_mtx, S, U): def filter_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16) size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=cp.bool) indexes = cp.zeros((size,), dtype=cp.bool)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment