Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Prog2
/
stat
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
A prog2-höz tartozó friss repo anyagok itt elérhetőek:
https://git.iit.bme.hu/
Commit
182c9eb4
authored
Aug 09, 2024
by
Karsa Zoltán István
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MoodleScraping utils
parents
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
123 additions
and
0 deletions
+123
-0
moodle_scraping.py
+123
-0
No files found.
moodle_scraping.py
0 → 100644
View file @
182c9eb4
import
requests
import
ssl
import
json
import
os
from
bs4
import
BeautifulSoup
from
urllib.parse
import
urljoin
# az adott ZH eredményeire mutató összefoglaló link
ILIAS_URL
=
'https://edu.vik.bme.hu/mod/quiz/report.php?id=124407&mode=overview'
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
def
shibboleth_auth
(
session
,
url
,
credentials
):
print
(
"Shibboleth Auth…"
)
print
(
"├── request target resource"
)
response
=
session
.
get
(
url
,
verify
=
False
,
allow_redirects
=
True
)
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
if
link
:
=
soup
.
find
(
'a'
,
class_
=
'login-identityprovider-btn'
):
print
(
"├── landing page"
)
response
=
session
.
get
(
link
[
'href'
],
allow_redirects
=
True
)
soup
=
BeautifulSoup
(
response
.
content
.
decode
(
'UTF-16LE'
),
'html.parser'
)
if
soup
.
find
(
'input'
,
id
=
'login-form_password'
):
print
(
"├── login credentials"
)
form
=
soup
.
find
(
'form'
)
data
=
[
(
name
,
value
)
for
name
,
value
in
get_form_data
(
form
)
if
name
not
in
credentials
]
data
.
extend
(
credentials
.
items
())
data
.
append
((
'_eventId_proceed'
,
''
))
response
=
session
.
post
(
urljoin
(
response
.
url
,
form
[
'action'
]),
data
=
dict
(
data
))
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
if
soup
.
find
(
'input'
,
attrs
=
{
'name'
:
'_shib_idp_consentIds'
}):
print
(
"├── grant permissions"
)
form
=
soup
.
find
(
'form'
)
data
=
get_form_data
(
form
)
response
=
session
.
post
(
urljoin
(
response
.
url
,
form
[
'action'
]),
data
=
data
)
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
if
soup
.
find
(
'input'
,
attrs
=
{
'name'
:
'SAMLResponse'
}):
print
(
"├── forward login token"
)
form
=
soup
.
find
(
'form'
)
data
=
get_form_data
(
form
)
response
=
session
.
post
(
urljoin
(
response
.
url
,
form
[
'action'
]),
data
=
data
)
print
(
"└── done"
)
return
response
def
get_form_data
(
form
):
return
[
(
elem
[
'name'
],
elem
[
'value'
])
for
elem
in
form
.
find_all
(
'input'
,
attrs
=
{
'name'
:
True
,
'value'
:
True
,
})
if
elem
[
'type'
]
!=
'submit'
or
elem
[
'value'
]
.
lower
()
!=
'reject'
]
# CodeRunner típusú response history-k mentésére (különben csak az utolsó válasz tölthető le)
def
main
():
with
requests
.
Session
()
as
session
:
shibboleth_auth
(
session
,
ILIAS_URL
,
{
# BME címtár belépési adatok
'j_username'
:
print
(
os
.
environ
[
'USER'
]),
'j_password'
:
print
(
os
.
environ
[
'PASS'
]),
'_shib_idp_revokeConsent'
:
'1'
,
})
response
=
session
.
get
(
ILIAS_URL
)
content
=
response
.
content
.
decode
(
"UTF-8"
)
soup
=
BeautifulSoup
(
content
,
'html.parser'
)
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c4"
)
# neptun oszlopának class azonja
neptuns
=
[
a
.
get_text
()
for
a
in
responses
if
len
(
a
.
get_text
())
==
6
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c27"
,
limit
=
len
(
neptuns
))
# az adott részkérdés (CodeRunner) oszlopának class azonja
hrefs
=
[
a
.
find
(
"a"
,
recursive
=
False
)[
"href"
]
for
a
in
responses
]
# CodeRunner típusú response history-k mentésére (különben csak az utolsó válasz tölthető le)
todict
=
{
}
for
i
in
range
(
0
,
len
(
neptuns
)):
neptun
=
neptuns
[
i
]
url
=
hrefs
[
i
]
response
=
session
.
get
(
url
)
content
=
response
.
content
.
decode
(
"UTF-8"
)
soup
=
BeautifulSoup
(
content
,
'html.parser'
)
soup
=
soup
.
find
(
"div"
,
class_
=
"responsehistoryheader"
)
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c1"
)
times
=
[
a
.
get_text
()
for
a
in
responses
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c2"
)
codes
=
[
a
.
get_text
()
for
a
in
responses
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c3"
)
states
=
[
a
.
get_text
()
for
a
in
responses
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c4 lastcol"
)
marks
=
[
a
.
get_text
()
for
a
in
responses
]
todict
[
neptun
]
=
{
"url"
:
url
,
"times"
:
times
,
"codes"
:
codes
,
"states"
:
states
,
"marks"
:
marks
}
with
open
(
"ZH2Ac27.json"
,
'w'
,
encoding
=
'utf8'
)
as
json_file
:
json
.
dump
(
todict
,
json_file
)
main
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment