Commit 8818e0ad by Karsa Zoltán István

Merge branch 'add-pytorch' into 'master'

Add pytorch

See merge request !3
parents 3524218d c479bed4
...@@ -50,3 +50,5 @@ coverage.xml ...@@ -50,3 +50,5 @@ coverage.xml
# Sphinx documentation # Sphinx documentation
docs/_build/ docs/_build/
.vscode/ .vscode/
*.out
__device__ inline bool check_exact_one(char* mtx) {
int sum = 0;
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
sum += mtx[i*4 + j];
}
}
return sum == 1;
}
__device__ inline bool check_exact_one(char* mtx, int S, int U) {
return check_exact_one(mtx) && mtx[S*4 + U] == 1;
}
__global__ void exact_one(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_exact_one(egysulyi_mtx + pos*16, S, U);
}
__device__ inline bool check_filter(char* mtx, int S, int U) {
if (mtx[4*S + U] == 1)
return true;
return false;
}
__global__ void filter(char* egysulyi_mtx, int len, bool* ok_arr, int S, int U) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= len)
return;
ok_arr[pos] = check_filter(egysulyi_mtx + pos*16, S, U);
}
\ No newline at end of file
...@@ -98,9 +98,7 @@ def angles_alap(anglestopick, plot = False): ...@@ -98,9 +98,7 @@ def angles_alap(anglestopick, plot = False):
tCx = tCy / tgtA tCx = tCy / tgtA
Cx = cp.concatenate((hCx, tCx), axis=None) Cx = cp.concatenate((hCx, tCx), axis=None)
Cx = cp.append(Cx, [0.5], axis=False)
Cy = cp.concatenate((hCy, tCy), axis=None) Cy = cp.concatenate((hCy, tCy), axis=None)
Cy = cp.append(Cy, [cp.sqrt(3.0)/2.0], axis=False)
return Cx, Cy return Cx, Cy
...@@ -142,10 +140,7 @@ def angles_ratet(anglestopick, plot = False): ...@@ -142,10 +140,7 @@ def angles_ratet(anglestopick, plot = False):
sin = cp.sin(anglestopick) sin = cp.sin(anglestopick)
Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten() Dx = cp.outer(cp.full(anglestopick.size, 1.0, dtype=cp.float64), Ex).flatten()
Dx = cp.append(Dx, [0.5], axis=False)
Dy = cp.outer(cos, Ey).flatten() Dy = cp.outer(cos, Ey).flatten()
Dy = cp.append(Dy, [cp.sqrt(3.0)/6.0], axis=False)
Dz = cp.outer(sin, Ey).flatten() Dz = cp.outer(sin, Ey).flatten()
Dz = cp.append(Dz, [cp.sqrt(2.0/3.0)], axis=False)
return Dx, Dy, Dz return Dx, Dy, Dz
\ No newline at end of file
...@@ -2,17 +2,17 @@ import sys, getopt ...@@ -2,17 +2,17 @@ import sys, getopt
from genax import gen_angels_to_pick, angles_alap, angles_ratet from genax import gen_angels_to_pick, angles_alap, angles_ratet
from gpu import start_kernel from gpu import start_kernel
from utils import convert, printresults from utils import convert, printresults, search, exact_one, exact_one_gpu, filter_gpu, writetofile
def main(argv): def main(argv):
outputfile = 'out.txt' outputfile = None
n = 3 n = 3
v = 3 v = 3
w = 3 w = 3
PLOT = False PLOT = False
try: try:
opts, args = getopt.getopt(argv,"hpn:v:w:d:") opts, args = getopt.getopt(argv,"hpn:v:w:d:o:")
except getopt.GetoptError as err: except getopt.GetoptError as err:
print(err) print(err)
print ('tetrarun.py -n <range division:int> -v <> -w <> -o <outputfile>') print ('tetrarun.py -n <range division:int> -v <> -w <> -o <outputfile>')
...@@ -41,7 +41,15 @@ def main(argv): ...@@ -41,7 +41,15 @@ def main(argv):
Dx, Dy, Dz = angles_ratet(space) Dx, Dy, Dz = angles_ratet(space)
res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w) res = start_kernel(Cx, Cy, Dx, Dy, Dz, v, w)
printresults(res)
if outputfile:
writetofile(outputfile, Cx, Cy, Dx, Dy, Dz, res)
#printresults(res)
#print("Exact one 3-3")
#exact_one_gpu(res, 3-1, 3-1)
#print("Filter 2-1")
#filter21 = filter_gpu(res, 2-1, 1-1)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main(sys.argv[1:])
\ No newline at end of file
import cupy as cp import cupy as cp
from functools import wraps
import time
import torch
import numpy as np import numpy as np
with open('filtering.cu') as f:
code = f.read()
kers = ('exact_one', 'filter')
ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
exact_one_cuda = ep_pontok_module.get_function(kers[0])
filter_cuda = ep_pontok_module.get_function(kers[1])
def timeit(func):
@wraps(func)
def timeit_wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
total_time = end_time - start_time
print(f'Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds')
return result
return timeit_wrapper
def expSpace(min, max, N, exponentialliness = 20.0): def expSpace(min, max, N, exponentialliness = 20.0):
LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64) LinVec = cp.linspace(0, cp.log10(exponentialliness+1, dtype=cp.float64),N, dtype=cp.float64)
return (max-min)/exponentialliness * (10.0**LinVec - 1) + min return (max-min)/exponentialliness * (10.0**LinVec - 1) + min
...@@ -35,6 +57,69 @@ def printresults(egyensulyi_mtx): ...@@ -35,6 +57,69 @@ def printresults(egyensulyi_mtx):
print(np.resize(parok, (int(N/2), 2))) print(np.resize(parok, (int(N/2), 2)))
print() print()
def writetofile(filename, Cx, Cy, Dx, Dy, Dz, egyensulyi_mtx):
lcm = compute_lcm(Cx.size, Dx.size)
Cx_cpu = Cx.get()
Cy_cpu = Cy.get()
Dx_cpu = Dx.get()
Dy_cpu = Dy.get()
Dz_cpu = Dz.get()
mtx_cpu = egyensulyi_mtx.get()
pos = Cx.size * Dx.size
f = open(filename, "w")
size_C = Cx.size
size_D = Dx.size
for i in range(0, pos):
parok = np.empty([0], dtype=np.int8)
for S in range(0, 4):
for U in range(0, 4):
if mtx_cpu[i][S][U] == 1:
parok = np.append(parok, S+1)
parok = np.append(parok, U+1)
N = parok.size
f.write(f"{Cx_cpu[i % size_C]}, {Cy_cpu[i % size_C]}, {Dx_cpu[(i + int(i / lcm)) % size_D]}, {Dy_cpu[(i + int(i / lcm)) % size_D]}, {Dz_cpu[(i + int(i / lcm)) % size_D]}\n")
f.write(np.array2string(np.resize(parok, (int(N/2), 2))))
f.write("\n")
f.close()
def search(egyensulyi_mtx, S, U):
for i in egyensulyi_mtx:
if i[S][U] == 1:
print(i)
def exact_one(egyensulyi_mtx, S, U):
for i in egyensulyi_mtx:
ok = True
for j in range(0, 4):
for k in range(0, 4):
if j == S and k == U and i[j][k] == 0:
ok = False
if j != S and k != U and i[j][k] == 1:
ok = False
if j == S and k != U and i[j][k] == 1:
ok = False
if j != S and k == U and i[j][k] == 1:
ok = False
if ok:
print(i)
def exact_one_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=bool)
numBlock = int((size + 256 - 1) / 256)
exact_one_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def filter_gpu(egyensulyi_mtx, S, U):
size = int(egyensulyi_mtx.size / 16)
indexes = cp.zeros((size,), dtype=cp.bool)
numBlock = int((size + 256 - 1) / 256)
filter_cuda((numBlock,), (256,), (egyensulyi_mtx, size, indexes, S, U))
return egyensulyi_mtx[indexes]
def compute_lcm(x, y): def compute_lcm(x, y):
if x > y: if x > y:
greater = x greater = x
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment