Commit 694ebe1b by Zoltan Karsa

bugfix

parent 340ad476
...@@ -263,6 +263,8 @@ __global__ void gpu_egyensulyi(int v, int w, const double* Cx_arr, const double ...@@ -263,6 +263,8 @@ __global__ void gpu_egyensulyi(int v, int w, const double* Cx_arr, const double
int pos = blockDim.x * blockIdx.x + threadIdx.x; int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= size_C*size_D) if (pos >= size_C*size_D)
return; return;
if (pos == 0)
printf("%d", size_C*size_D);
vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0); vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0);
vec3 D(Dx_arr[(pos + pos / lcm) % size_D], Dy_arr[(pos + pos / lcm) % size_D], Dz_arr[(pos + pos / lcm) % size_D]); vec3 D(Dx_arr[(pos + pos / lcm) % size_D], Dy_arr[(pos + pos / lcm) % size_D], Dz_arr[(pos + pos / lcm) % size_D]);
......
...@@ -16,6 +16,7 @@ def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w): ...@@ -16,6 +16,7 @@ def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
lcm = compute_lcm(Cx.size, Dx.size) lcm = compute_lcm(Cx.size, Dx.size)
egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8) egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
numBlock = int((Cx.size*Dx.size + fun.max_threads_per_block - 1) / fun.max_threads_per_block) numBlock = int((Cx.size*Dx.size + fun.max_threads_per_block - 1) / fun.max_threads_per_block)
print(f"{Cx.size}, {Cy.size}, {Dx.size}, {Dy.size}, {Dz.size}, {egyensulyi_mtx.shape}, {egyensulyi_mtx.nbytes}")
fun((numBlock,), (fun.max_threads_per_block,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, lcm, egyensulyi_mtx)) fun((numBlock,), (fun.max_threads_per_block,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, lcm, egyensulyi_mtx))
return egyensulyi_mtx return egyensulyi_mtx
#!/bin/bash #!/bin/bash
#SBATCH --job-name=gpgpu # a job neve #SBATCH --job-name=gpgpu # a job neve
#SBATCH -N 1 # hány node-ot szeretnénk használni #SBATCH -N 4 # hány node-ot szeretnénk használni
#SBATCH -p gpu # melyik partícióból #SBATCH -p gpu # melyik partícióból
#SBATCH --gres gpu # melyik partícióból #SBATCH --gres gpu # melyik partícióból
#SBATCH --time=99:00:00 # maximális id?~Q #SBATCH --time=99:00:00 # maximális id?~Q
#SBATCH -o politopok.out # kimeneti fájl #SBATCH -o politopok.out # kimeneti fájl
#SBATCH --mem=0 #SBATCH --mem=0
module load anaconda3 module load anaconda3
module load cuda11.0 module load cuda11.0
module load mpi-3.1 module load mpi-3.1
srun python tetrarun.py -n 51 -v 50 -w 50 -o /gv0/karsa/poli_51_50_50.out mpirun python tetrarun.py -n 51 -v 50 -w 50 -o /gv0/karsa/poli_51_50_50.out
\ No newline at end of file \ No newline at end of file
...@@ -68,12 +68,12 @@ def main(argv): ...@@ -68,12 +68,12 @@ def main(argv):
res.get(out=mtx_cpu) res.get(out=mtx_cpu)
#writetofile(outputfile+'.full', Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi) #writetofile(outputfile+'.full', Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi)
writetofile2(outputfile, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi) #writetofile2(outputfile, Cx_cpu, Cy_cpu, Dx_cpu, Dy_cpu, Dz_cpu, mtx_cpu, mpi)
if outputfile and binary: if outputfile and binary:
offset = '' offset = ''
if mpi: if mpi:
offset = f"R{rank}/{size}" offset = f"R{rank}-{size}"
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w) res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
os.mkdir(outputfile) os.mkdir(outputfile)
cp.save(f"{outputfile}/{offset}_Cx.npy", Cx) cp.save(f"{outputfile}/{offset}_Cx.npy", Cx)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment