Commit 865dbc07 by Zoltan Karsa

size

parent 8f302c12
...@@ -26,6 +26,8 @@ extern "C" ...@@ -26,6 +26,8 @@ extern "C"
__global__ __global__
void parosit(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) { void parosit(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
return;
float alpha = x1[tid]; float alpha = x1[tid];
if (m*m <= tid*m+m-1) if (m*m <= tid*m+m-1)
return; return;
...@@ -47,9 +49,9 @@ extern "C" ...@@ -47,9 +49,9 @@ extern "C"
__global__ __global__
void parosit2(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) { void parosit2(const double* x1, const double* x2, double* a, double* b, const int m, const double PI) {
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;
float alpha = x1[tid]; if (m <= tid || m*m <= tid*m+m-1)
if (m*m <= tid*m+m-1)
return; return;
float alpha = x1[tid];
for (int i = 0; i < m; i++) { for (int i = 0; i < m; i++) {
float betha = x2[i]; float betha = x2[i];
if ((alpha + betha) < PI && alpha > 0.0) { if ((alpha + betha) < PI && alpha > 0.0) {
......
...@@ -9,7 +9,7 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres ...@@ -9,7 +9,7 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres
fun = ep_pontok_module.get_function(kers[0]) fun = ep_pontok_module.get_function(kers[0])
def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w): def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
print("Res size (byte): ", Cx.size*Dx.size*4*4)
egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8) egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
numBlock = int((Cx.size*Dx.size + 256 - 1) / 256) numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx)) fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, egyensulyi_mtx))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment