Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Karsa Zoltán István
/
politopok
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3524218d
authored
Jan 31, 2023
by
Karsa Zoltán István
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'testing' into 'master'
Testing See merge request
!2
parents
ae7ef0d9
1c755e5b
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
112 additions
and
23 deletions
+112
-23
cores.py
+32
-0
epgpu.cu
+14
-14
genax.py
+13
-5
gpu.py
+5
-2
tetrarun.py
+4
-2
utils.py
+44
-0
No files found.
cores.py
0 → 100644
View file @
3524218d
from
numba
import
cuda
cc_cores_per_SM_dict
=
{
(
2
,
0
)
:
32
,
(
2
,
1
)
:
48
,
(
3
,
0
)
:
192
,
(
3
,
5
)
:
192
,
(
3
,
7
)
:
192
,
(
5
,
0
)
:
128
,
(
5
,
2
)
:
128
,
(
6
,
0
)
:
64
,
(
6
,
1
)
:
128
,
(
7
,
0
)
:
64
,
(
7
,
5
)
:
64
,
(
8
,
0
)
:
64
,
(
8
,
6
)
:
128
,
(
8
,
9
)
:
128
,
(
9
,
0
)
:
128
}
# the above dictionary should result in a value of "None" if a cc match
# is not found. The dictionary needs to be extended as new devices become
# available, and currently does not account for all Jetson devices
device
=
cuda
.
get_current_device
()
my_sms
=
getattr
(
device
,
'MULTIPROCESSOR_COUNT'
)
my_cc
=
device
.
compute_capability
cores_per_sm
=
cc_cores_per_SM_dict
.
get
(
my_cc
)
total_cores
=
cores_per_sm
*
my_sms
print
(
"GPU compute capability: "
,
my_cc
)
print
(
"GPU total number of SMs: "
,
my_sms
)
print
(
"total cores: "
,
total_cores
)
\ No newline at end of file
epgpu.cu
View file @
3524218d
...
@@ -190,13 +190,13 @@ __device__ void ABC_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
...
@@ -190,13 +190,13 @@ __device__ void ABC_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
vec3 AB(1.0/v, 0.0, 0.0);
vec3 AB(1.0/v, 0.0, 0.0);
vec3 AC = C/v;
vec3 AC = C/v;
for (double i =
1.0
; i < v; i++)
for (double i =
0.0001
; i < v; i++)
{
{
for (double j =
1.0
; j < v; j++)
for (double j =
0.0001
; j < v; j++)
{
{
vec3 K = i*AB + j * AC;
vec3 K = i*AB + j * AC;
vec3 L = (D - K)/w;
vec3 L = (D - K)/w;
for (double k =
1.0
; k < w; k++)
for (double k =
0.0001
; k < w; k++)
{
{
vec3 Sv = K + L*k;
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int S = stabil_ep(Sv, C, D);
...
@@ -217,13 +217,13 @@ __device__ void BCD_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
...
@@ -217,13 +217,13 @@ __device__ void BCD_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
vec3 BC = (C - B) / v;
vec3 BC = (C - B) / v;
vec3 BD = (D - B) / v;
vec3 BD = (D - B) / v;
for (double i =
1.0
; i < v; i++)
for (double i =
0.0001
; i < v; i++)
{
{
for (double j =
1.0
; j < v; j++)
for (double j =
0.0001
; j < v; j++)
{
{
vec3 K = B + i * BC + j * BD;
vec3 K = B + i * BC + j * BD;
vec3 L = (A - K)/w;
vec3 L = (A - K)/w;
for (double k =
1.0
; k < w; k++)
for (double k =
0.0001
; k < w; k++)
{
{
vec3 Sv = K + L*k;
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int S = stabil_ep(Sv, C, D);
...
@@ -244,13 +244,13 @@ __device__ void CDA_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
...
@@ -244,13 +244,13 @@ __device__ void CDA_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
vec3 CA = (A - C) / v;
vec3 CA = (A - C) / v;
vec3 CD = (D - C) / v;
vec3 CD = (D - C) / v;
for (double i =
1.0
; i < v; i++)
for (double i =
0.0001
; i < v; i++)
{
{
for (double j =
1.0
; j < v; j++)
for (double j =
0.0001
; j < v; j++)
{
{
vec3 K = C + i * CA + j * CD;
vec3 K = C + i * CA + j * CD;
vec3 L = (B - K)/w;
vec3 L = (B - K)/w;
for (double k =
1.0
; k < w; k++)
for (double k =
0.0001
; k < w; k++)
{
{
vec3 Sv = K + L*k;
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int S = stabil_ep(Sv, C, D);
...
@@ -271,13 +271,13 @@ __device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
...
@@ -271,13 +271,13 @@ __device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
vec3 DA = (A - D) / v;
vec3 DA = (A - D) / v;
vec3 DB = (B - D) / v;
vec3 DB = (B - D) / v;
for (double i =
1.0
; i < v; i++)
for (double i =
0.0001
; i < v; i++)
{
{
for (double j =
1.0
; j < v; j++)
for (double j =
0.0001
; j < v; j++)
{
{
vec3 K = D + i * DA + j * DB;
vec3 K = D + i * DA + j * DB;
vec3 L = (C - K)/w;
vec3 L = (C - K)/w;
for (double k =
1.0
; k < w; k++)
for (double k =
0.0001
; k < w; k++)
{
{
vec3 Sv = K + L*k;
vec3 Sv = K + L*k;
int S = stabil_ep(Sv, C, D);
int S = stabil_ep(Sv, C, D);
...
@@ -292,12 +292,12 @@ __device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
...
@@ -292,12 +292,12 @@ __device__ void DAB_oldal(int v, int w, const vec3& C, const vec3& D, char* egys
}
}
__global__ void gpu_egyensulyi(int v, int w, double* Cx_arr, double* Cy_arr,
__global__ void gpu_egyensulyi(int v, int w, double* Cx_arr, double* Cy_arr,
double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D, char* egysulyi_mtx) {
double* Dx_arr, double* Dy_arr, double* Dz_arr, int size_C, int size_D,
int lcm,
char* egysulyi_mtx) {
int pos = blockDim.x * blockIdx.x + threadIdx.x;
int pos = blockDim.x * blockIdx.x + threadIdx.x;
if (pos >= size_C*size_D)
if (pos >= size_C*size_D)
return;
return;
vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0);
vec3 C(Cx_arr[pos % size_C], Cy_arr[pos % size_C], 0.0);
vec3 D(Dx_arr[
pos % size_D], Dy_arr[pos % size_D], Dz_arr[pos
% size_D]);
vec3 D(Dx_arr[
(pos + pos / lcm) % size_D], Dy_arr[(pos + pos / lcm) % size_D], Dz_arr[(pos + pos / lcm)
% size_D]);
ABC_oldal(v, w, C, D, egysulyi_mtx);
ABC_oldal(v, w, C, D, egysulyi_mtx);
BCD_oldal(v, w, C, D, egysulyi_mtx);
BCD_oldal(v, w, C, D, egysulyi_mtx);
...
...
genax.py
View file @
3524218d
...
@@ -28,11 +28,11 @@ void parosit(const double* x1, const double* x2, double* a, double* b, const int
...
@@ -28,11 +28,11 @@ void parosit(const double* x1, const double* x2, double* a, double* b, const int
int tid = blockDim.x * blockIdx.x + threadIdx.x;
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
if (m <= tid || m*m <= tid*m+m-1)
return;
return;
float
alpha = x1[tid];
double
alpha = x1[tid];
if (m*m <= tid*m+m-1)
if (m*m <= tid*m+m-1)
return;
return;
for (int i = 0; i < m; i++) {
for (int i = 0; i < m; i++) {
float
betha = x2[i];
double
betha = x2[i];
if ((alpha + betha) < PI && betha >= alpha && alpha > 0.0) {
if ((alpha + betha) < PI && betha >= alpha && alpha > 0.0) {
a[tid*m+i] = alpha;
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
b[tid*m+i] = betha;
...
@@ -51,9 +51,11 @@ void parosit2(const double* x1, const double* x2, double* a, double* b, const in
...
@@ -51,9 +51,11 @@ void parosit2(const double* x1, const double* x2, double* a, double* b, const in
int tid = blockDim.x * blockIdx.x + threadIdx.x;
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (m <= tid || m*m <= tid*m+m-1)
if (m <= tid || m*m <= tid*m+m-1)
return;
return;
float alpha = x1[tid];
double alpha = x1[tid];
if (m*m <= tid*m+m-1)
return;
for (int i = 0; i < m; i++) {
for (int i = 0; i < m; i++) {
float
betha = x2[i];
double
betha = x2[i];
if ((alpha + betha) < PI && alpha > 0.0) {
if ((alpha + betha) < PI && alpha > 0.0) {
a[tid*m+i] = alpha;
a[tid*m+i] = alpha;
b[tid*m+i] = betha;
b[tid*m+i] = betha;
...
@@ -95,8 +97,10 @@ def angles_alap(anglestopick, plot = False):
...
@@ -95,8 +97,10 @@ def angles_alap(anglestopick, plot = False):
tCy
=
tgtA
/
mtgt
tCy
=
tgtA
/
mtgt
tCx
=
tCy
/
tgtA
tCx
=
tCy
/
tgtA
Cy
=
cp
.
concatenate
((
hCy
,
tCy
),
axis
=
None
)
Cx
=
cp
.
concatenate
((
hCx
,
tCx
),
axis
=
None
)
Cx
=
cp
.
concatenate
((
hCx
,
tCx
),
axis
=
None
)
Cx
=
cp
.
append
(
Cx
,
[
0.5
],
axis
=
False
)
Cy
=
cp
.
concatenate
((
hCy
,
tCy
),
axis
=
None
)
Cy
=
cp
.
append
(
Cy
,
[
cp
.
sqrt
(
3.0
)
/
2.0
],
axis
=
False
)
return
Cx
,
Cy
return
Cx
,
Cy
...
@@ -138,7 +142,10 @@ def angles_ratet(anglestopick, plot = False):
...
@@ -138,7 +142,10 @@ def angles_ratet(anglestopick, plot = False):
sin
=
cp
.
sin
(
anglestopick
)
sin
=
cp
.
sin
(
anglestopick
)
Dx
=
cp
.
outer
(
cp
.
full
(
anglestopick
.
size
,
1.0
,
dtype
=
cp
.
float64
),
Ex
)
.
flatten
()
Dx
=
cp
.
outer
(
cp
.
full
(
anglestopick
.
size
,
1.0
,
dtype
=
cp
.
float64
),
Ex
)
.
flatten
()
Dx
=
cp
.
append
(
Dx
,
[
0.5
],
axis
=
False
)
Dy
=
cp
.
outer
(
cos
,
Ey
)
.
flatten
()
Dy
=
cp
.
outer
(
cos
,
Ey
)
.
flatten
()
Dy
=
cp
.
append
(
Dy
,
[
cp
.
sqrt
(
3.0
)
/
6.0
],
axis
=
False
)
Dz
=
cp
.
outer
(
sin
,
Ey
)
.
flatten
()
Dz
=
cp
.
outer
(
sin
,
Ey
)
.
flatten
()
Dz
=
cp
.
append
(
Dz
,
[
cp
.
sqrt
(
2.0
/
3.0
)],
axis
=
False
)
return
Dx
,
Dy
,
Dz
return
Dx
,
Dy
,
Dz
\ No newline at end of file
gpu.py
View file @
3524218d
from
numba
import
cuda
from
numba
import
cuda
import
cupy
as
cp
import
cupy
as
cp
from
utils
import
compute_lcm
with
open
(
'epgpu.cu'
)
as
f
:
with
open
(
'epgpu.cu'
)
as
f
:
code
=
f
.
read
()
code
=
f
.
read
()
...
@@ -9,10 +10,12 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres
...
@@ -9,10 +10,12 @@ ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expres
fun
=
ep_pontok_module
.
get_function
(
kers
[
0
])
fun
=
ep_pontok_module
.
get_function
(
kers
[
0
])
def
start_kernel
(
Cx
,
Cy
,
Dx
,
Dy
,
Dz
,
v
,
w
):
def
start_kernel
(
Cx
,
Cy
,
Dx
,
Dy
,
Dz
,
v
,
w
):
print
(
f
"Cnt: {Cx.size}x{Dx.size}={Cx.size*Dx.size}"
)
print
(
"Res size (byte): "
,
Cx
.
size
*
Dx
.
size
*
4
*
4
)
print
(
"Res size (byte): "
,
Cx
.
size
*
Dx
.
size
*
4
*
4
)
print
(
Cx
.
size
,
","
,
Cy
.
size
,
","
,
Dx
.
size
,
","
,
Dy
.
size
,
","
,
Dz
.
size
)
lcm
=
compute_lcm
(
Cx
.
size
,
Dx
.
size
)
#print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
egyensulyi_mtx
=
cp
.
zeros
((
Cx
.
size
*
Dx
.
size
,
4
,
4
),
dtype
=
cp
.
int8
)
egyensulyi_mtx
=
cp
.
zeros
((
Cx
.
size
*
Dx
.
size
,
4
,
4
),
dtype
=
cp
.
int8
)
numBlock
=
int
((
Cx
.
size
*
Dx
.
size
+
256
-
1
)
/
256
)
numBlock
=
int
((
Cx
.
size
*
Dx
.
size
+
256
-
1
)
/
256
)
fun
((
numBlock
,),
(
256
,),
(
v
,
w
,
Cx
,
Cy
,
Dx
,
Dy
,
Dz
,
Cx
.
size
,
Dx
.
size
,
egyensulyi_mtx
))
fun
((
numBlock
,),
(
256
,),
(
v
,
w
,
Cx
,
Cy
,
Dx
,
Dy
,
Dz
,
Cx
.
size
,
Dx
.
size
,
lcm
,
egyensulyi_mtx
))
return
egyensulyi_mtx
return
egyensulyi_mtx
tetrarun.py
View file @
3524218d
...
@@ -2,6 +2,7 @@ import sys, getopt
...
@@ -2,6 +2,7 @@ import sys, getopt
from
genax
import
gen_angels_to_pick
,
angles_alap
,
angles_ratet
from
genax
import
gen_angels_to_pick
,
angles_alap
,
angles_ratet
from
gpu
import
start_kernel
from
gpu
import
start_kernel
from
utils
import
convert
,
printresults
def
main
(
argv
):
def
main
(
argv
):
outputfile
=
'out.txt'
outputfile
=
'out.txt'
...
@@ -34,13 +35,13 @@ def main(argv):
...
@@ -34,13 +35,13 @@ def main(argv):
PLOT
=
True
PLOT
=
True
space
=
gen_angels_to_pick
(
n
,
PLOT
)
space
=
gen_angels_to_pick
(
n
,
PLOT
)
Cx
,
Cy
=
angles_alap
(
space
,
PLOT
)
Cx
,
Cy
=
angles_alap
(
space
,
PLOT
)
Dx
,
Dy
,
Dz
=
angles_ratet
(
space
)
Dx
,
Dy
,
Dz
=
angles_ratet
(
space
)
res
=
start_kernel
(
Cx
,
Cy
,
Dx
,
Dy
,
Dz
,
v
,
w
)
res
=
start_kernel
(
Cx
,
Cy
,
Dx
,
Dy
,
Dz
,
v
,
w
)
printresults
(
res
)
#print(res)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
(
sys
.
argv
[
1
:])
main
(
sys
.
argv
[
1
:])
\ No newline at end of file
utils.py
View file @
3524218d
import
cupy
as
cp
import
cupy
as
cp
import
numpy
as
np
def
expSpace
(
min
,
max
,
N
,
exponentialliness
=
20.0
):
def
expSpace
(
min
,
max
,
N
,
exponentialliness
=
20.0
):
LinVec
=
cp
.
linspace
(
0
,
cp
.
log10
(
exponentialliness
+
1
,
dtype
=
cp
.
float64
),
N
,
dtype
=
cp
.
float64
)
LinVec
=
cp
.
linspace
(
0
,
cp
.
log10
(
exponentialliness
+
1
,
dtype
=
cp
.
float64
),
N
,
dtype
=
cp
.
float64
)
return
(
max
-
min
)
/
exponentialliness
*
(
10.0
**
LinVec
-
1
)
+
min
return
(
max
-
min
)
/
exponentialliness
*
(
10.0
**
LinVec
-
1
)
+
min
def
convert
(
egyensulyi_mtx
):
parok
=
np
.
empty
([
0
],
dtype
=
np
.
int8
)
for
i
in
egyensulyi_mtx
:
for
S
in
range
(
0
,
4
):
for
U
in
range
(
0
,
4
):
if
i
[
S
][
U
]
==
1
:
parok
=
np
.
append
(
parok
,
S
+
1
)
parok
=
np
.
append
(
parok
,
U
+
1
)
parok
=
np
.
append
(
parok
,
0
)
parok
=
np
.
append
(
parok
,
0
)
N
=
parok
.
size
return
np
.
resize
(
parok
,
(
int
(
N
/
2
),
2
))
def
printresults
(
egyensulyi_mtx
):
ossz
=
0
for
i
in
egyensulyi_mtx
:
parok
=
np
.
empty
([
0
],
dtype
=
np
.
int8
)
for
S
in
range
(
0
,
4
):
for
U
in
range
(
0
,
4
):
if
i
[
S
][
U
]
==
1
:
parok
=
np
.
append
(
parok
,
S
+
1
)
parok
=
np
.
append
(
parok
,
U
+
1
)
N
=
parok
.
size
print
(
f
"{int(N/2)}x2"
)
print
(
np
.
resize
(
parok
,
(
int
(
N
/
2
),
2
)))
print
()
def
compute_lcm
(
x
,
y
):
if
x
>
y
:
greater
=
x
else
:
greater
=
y
while
(
True
):
if
((
greater
%
x
==
0
)
and
(
greater
%
y
==
0
)):
lcm
=
greater
break
greater
+=
1
return
lcm
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment