Commit 5fb0d82c by Zoltan Karsa

filters

parent d2383f0f
__device__ inline bool check_exact_one(char* mtx, int S, int U) {
__device__ inline bool check_exact_one(char* mtx) {
int sum = 0;
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
if (mtx[4*i + j] == 0 && i == S && j == U)
return false;
if (mtx[4*i + j] == 1 && i != S && j != U)
return false;
if (mtx[4*i + j] == 1 && i == S && j != U)
return false;
if (mtx[4*i + j] == 1 && i != S && j == U)
return false;
sum += mtx[i*4 + j];
}
}
return true;
return sum == 1;
}
__device__ inline bool check_exact_one(char* mtx, int S, int U) {
return check_exact_one(mtx) && mtx[S*4 + U] == 1;
}
__global__ void exact_one(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
......@@ -21,15 +19,15 @@ __global__ void exact_one(char* egysulyi_mtx, int len, bool* ok_arr, int S, int
ok_arr[pos] = check_exact_one(egysulyi_mtx + pos*16, S, U);
}
__device__ inline bool check_search(char* mtx, int S, int U) {
__device__ inline bool check_filter(char* mtx, int S, int U) {
if (mtx[4*S + U] == 1)
return false;
return true;
return true;
return false;
}
__global__ void search(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
__global__ void filter(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_search(egysulyi_mtx + pos*16, S, U);
ok_arr[pos] = check_filter(egysulyi_mtx + pos*16, S, U);
}
\ No newline at end of file
......@@ -2,7 +2,7 @@ import sys, getopt
from genax import gen_angels_to_pick, angles_alap, angles_ratet
from gpu import start_kernel
from utils import convert, printresults, search, exact_one, exact_one_gpu
from utils import convert, printresults, search, exact_one, exact_one_gpu, filter_gpu
def main(argv):
outputfile = 'out.txt'
......@@ -42,8 +42,10 @@ def main(argv):
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
#printresults(res)
#exact_one(res, 3-1, 3-1)
print("Exact one 3-3")
print(exact_one_gpu(res, 3-1, 3-1))
print("Filter 2-1")
print(filter_gpu(res, 2-1, 1-1))
if __name__ == "__main__":
main(sys.argv[1:])
\ No newline at end of file
......@@ -4,9 +4,10 @@ import numpy as np
with open('filtering.cu') as f:
code = f.read()
kers = ('exact_one', )
kers = ('exact_one', 'filter')
ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
exact_one_cuda = ep_pontok_module.get_function(kers[0])
filter_cuda = ep_pontok_module.get_function(kers[1])
def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
......@@ -70,6 +71,14 @@ def exact_one_gpu(egyensulyi_mtx, S, U):
exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def filter_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=cp.bool)
numBlock = int((size + 256 - 1) / 256)
filter_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def compute_lcm(x, y):
if x > y:
greater = x
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment