Commit 8818e0ad by Karsa Zoltán István

Merge branch 'add-pytorch' into 'master'

Add pytorch

See merge request !3
parents 3524218d c479bed4
......@@ -49,4 +49,6 @@ coverage.xml
# Sphinx documentation
docs/_build/
.vscode/
\ No newline at end of file
.vscode/
*.out
__device__ inline bool check_exact_one(char* mtx) {
int sum = 0;
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
sum += mtx[i*4 + j];
}
}
return sum == 1;
}
__device__ inline bool check_exact_one(char* mtx, int S, int U) {
return check_exact_one(mtx) && mtx[S*4 + U] == 1;
}
__global__ void exact_one(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_exact_one(egysulyi_mtx + pos*16, S, U);
}
__device__ inline bool check_filter(char* mtx, int S, int U) {
if (mtx[4*S + U] == 1)
return true;
return false;
}
__global__ void filter(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_filter(egysulyi_mtx + pos*16, S, U);
}
\ No newline at end of file
......@@ -98,9 +98,7 @@ def angles_alap(anglestopick, plot = False):
tCx = tCy / tgtA
Cx = cp.concatenate((hCx, tCx), axis=None)
Cx = cp.append(Cx, [0.5], axis=False)
Cy = cp.concatenate((hCy, tCy), axis=None)
Cy = cp.append(Cy, [cp.sqrt(3.0)/2.0], axis=False)
return Cx, Cy
......@@ -142,10 +140,7 @@ def angles_ratet(anglestopick, plot = False):
sin = cp.sin(anglestopick)
Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
Dx = cp.append(Dx, [0.5], axis=False)
Dy = cp.outer(cos, Ey).flatten()
Dy = cp.append(Dy, [cp.sqrt(3.0)/6.0], axis=False)
Dz = cp.outer(sin, Ey).flatten()
Dz = cp.append(Dz, [cp.sqrt(2.0/3.0)], axis=False)
return Dx, Dy, Dz
\ No newline at end of file
......@@ -2,17 +2,17 @@ import sys, getopt
from genax import gen_angels_to_pick, angles_alap, angles_ratet
from gpu import start_kernel
from utils import convert, printresults
from utils import convert, printresults, search, exact_one, exact_one_gpu, filter_gpu, writetofile
def main(argv):
outputfile = 'out.txt'
outputfile = None
n = 3
v = 3
w = 3
PLOT = False
try:
opts, args = getopt.getopt(argv,"hpn:v:w:d:")
opts, args = getopt.getopt(argv,"hpn:v:w:d:o:")
except getopt.GetoptError as err:
print(err)
print ('tetrarun.py -n <range division:int> -v <> -w <> -o <outputfile>')
......@@ -41,7 +41,15 @@ def main(argv):
Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
printresults(res)
if outputfile:
writetofile(outputfile, Cx, Cy, Dx, Dy, Dz, res)
#printresults(res)
#print("Exact one 3-3")
#exact_one_gpu(res, 3-1, 3-1)
#print("Filter 2-1")
#filter21 = filter_gpu(res, 2-1, 1-1)
if __name__ == "__main__":
main(sys.argv[1:])
\ No newline at end of file
import cupy as cp
from functools import wraps
import time
import torch
import numpy as np
with open('filtering.cu') as f:
code = f.read()
kers = ('exact_one', 'filter')
ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
exact_one_cuda = ep_pontok_module.get_function(kers[0])
filter_cuda = ep_pontok_module.get_function(kers[1])
def timeit(func):
@wraps(func)
def timeit_wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
total_time = end_time - start_time
print(f'Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds')
return result
return timeit_wrapper
def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
......@@ -35,6 +57,69 @@ def printresults(egyensulyi_mtx):
print(np.resize(parok, (int(N/2), 2)))
print()
def writetofile(filename, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
lcm = compute_lcm(Cx.size, Dx.size)
Cx_cpu = Cx.get()
Cy_cpu = Cy.get()
Dx_cpu = Dx.get()
Dy_cpu = Dy.get()
Dz_cpu = Dz.get()
mtx_cpu = egyensulyi_mtx.get()
pos = Cx.size * Dx.size
f = open(filename, "w")
size_C = Cx.size
size_D = Dx.size
for i in range(0, pos):
parok = np.empty([0], dtype=np.int8)
for S in range(0, 4):
for U in range(0, 4):
if mtx_cpu[i][S][U] == 1:
parok = np.append(parok, S+1)
parok = np.append(parok, U+1)
N = parok.size
f.write(f"{Cx_cpu[i % size_C]}, {Cy_cpu[i % size_C]}, {Dx_cpu[(i + int(i / lcm)) % size_D]}, {Dy_cpu[(i + int(i / lcm)) % size_D]}, {Dz_cpu[(i + int(i / lcm)) % size_D]}\n")
f.write(np.array2string(np.resize(parok, (int(N/2), 2))))
f.write("\n")
f.close()
def search(egyensulyi_mtx, S, U):
for i in egyensulyi_mtx:
if i[S][U] == 1:
print(i)
def exact_one(egyensulyi_mtx, S, U):
for i in egyensulyi_mtx:
ok = True
for j in range(0, 4):
for k in range(0, 4):
if j == S and k == U and i[j][k] == 0:
ok = False
if j != S and k != U and i[j][k] == 1:
ok = False
if j == S and k != U and i[j][k] == 1:
ok = False
if j != S and k == U and i[j][k] == 1:
ok = False
if ok:
print(i)
def exact_one_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=bool)
numBlock = int((size + 256 - 1) / 256)
exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def filter_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=cp.bool)
numBlock = int((size + 256 - 1) / 256)
filter_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def compute_lcm(x, y):
if x > y:
greater = x
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment