Commit d2383f0f by Zoltan Karsa

gpu filtering

parent 173e2dce
__device__ inline bool check_exact_one(char* mtx, int S, int U) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
if (mtx[4*i + j] == 0 && i == S && j == U)
return false;
if (mtx[4*i + j] == 1 && i != S && j != U)
return false;
if (mtx[4*i + j] == 1 && i == S && j != U)
return false;
if (mtx[4*i + j] == 1 && i != S && j == U)
return false;
}
}
return true;
}
__global__ void exact_one(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_exact_one(egysulyi_mtx + pos*16, S, U);
}
__device__ inline bool check_search(char* mtx, int S, int U) {
if (mtx[4*S + U] == 1)
return false;
return true;
}
__global__ void search(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_search(egysulyi_mtx + pos*16, S, U);
}
\ No newline at end of file
...@@ -2,7 +2,7 @@ import sys, getopt ...@@ -2,7 +2,7 @@ import sys, getopt
from genax import gen_angels_to_pick, angles_alap, angles_ratet from genax import gen_angels_to_pick, angles_alap, angles_ratet
from gpu import start_kernel from gpu import start_kernel
from utils import convert, printresults from utils import convert, printresults, search, exact_one, exact_one_gpu
def main(argv): def main(argv):
outputfile = 'out.txt' outputfile = 'out.txt'
...@@ -41,7 +41,9 @@ def main(argv): ...@@ -41,7 +41,9 @@ def main(argv):
Dx, Dy, Dz = angles_ratet(space) Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w) res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
printresults(res) #printresults(res)
#exact_one(res, 3-1, 3-1)
print(exact_one_gpu(res, 3-1, 3-1))
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main(sys.argv[1:])
\ No newline at end of file
import cupy as cp import cupy as cp
import numpy as np import numpy as np
with open('filtering.cu') as f:
code = f.read()
kers = ('exact_one', )
ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
exact_one_cuda = ep_pontok_module.get_function(kers[0])
def expSpace(min, max, N, exponentialliness = 20.0): def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64) LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
...@@ -35,6 +42,34 @@ def printresults(egyensulyi_mtx): ...@@ -35,6 +42,34 @@ def printresults(egyensulyi_mtx):
print(np.resize(parok, (int(N/2), 2))) print(np.resize(parok, (int(N/2), 2)))
print() print()
def search(egyensulyi_mtx, S, U):
for i in egyensulyi_mtx:
if i[S][U] == 1:
print(i)
def exact_one(egyensulyi_mtx, S, U):
for i in egyensulyi_mtx:
ok = True
for j in range(0, 4):
for k in range(0, 4):
if j == S and k == U and i[j][k] == 0:
ok = False
if j != S and k != U and i[j][k] == 1:
ok = False
if j == S and k != U and i[j][k] == 1:
ok = False
if j != S and k == U and i[j][k] == 1:
ok = False
if ok:
print(i)
def exact_one_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=cp.bool)
numBlock = int((size + 256 - 1) / 256)
exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def compute_lcm(x, y): def compute_lcm(x, y):
if x > y: if x > y:
greater = x greater = x
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment