Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Prog2
/
stat
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
182c9eb4
authored
Aug 09, 2024
by
Karsa Zoltán István
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MoodleScraping utils
parents
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
123 additions
and
0 deletions
+123
-0
moodle_scraping.py
+123
-0
No files found.
moodle_scraping.py
0 → 100644
View file @
182c9eb4
import
requests
import
ssl
import
json
import
os
from
bs4
import
BeautifulSoup
from
urllib.parse
import
urljoin
# az adott ZH eredményeire mutató összefoglaló link
ILIAS_URL
=
'https://edu.vik.bme.hu/mod/quiz/report.php?id=124407&mode=overview'
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
def
shibboleth_auth
(
session
,
url
,
credentials
):
print
(
"Shibboleth Auth…"
)
print
(
"├── request target resource"
)
response
=
session
.
get
(
url
,
verify
=
False
,
allow_redirects
=
True
)
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
if
link
:
=
soup
.
find
(
'a'
,
class_
=
'login-identityprovider-btn'
):
print
(
"├── landing page"
)
response
=
session
.
get
(
link
[
'href'
],
allow_redirects
=
True
)
soup
=
BeautifulSoup
(
response
.
content
.
decode
(
'UTF-16LE'
),
'html.parser'
)
if
soup
.
find
(
'input'
,
id
=
'login-form_password'
):
print
(
"├── login credentials"
)
form
=
soup
.
find
(
'form'
)
data
=
[
(
name
,
value
)
for
name
,
value
in
get_form_data
(
form
)
if
name
not
in
credentials
]
data
.
extend
(
credentials
.
items
())
data
.
append
((
'_eventId_proceed'
,
''
))
response
=
session
.
post
(
urljoin
(
response
.
url
,
form
[
'action'
]),
data
=
dict
(
data
))
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
if
soup
.
find
(
'input'
,
attrs
=
{
'name'
:
'_shib_idp_consentIds'
}):
print
(
"├── grant permissions"
)
form
=
soup
.
find
(
'form'
)
data
=
get_form_data
(
form
)
response
=
session
.
post
(
urljoin
(
response
.
url
,
form
[
'action'
]),
data
=
data
)
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
if
soup
.
find
(
'input'
,
attrs
=
{
'name'
:
'SAMLResponse'
}):
print
(
"├── forward login token"
)
form
=
soup
.
find
(
'form'
)
data
=
get_form_data
(
form
)
response
=
session
.
post
(
urljoin
(
response
.
url
,
form
[
'action'
]),
data
=
data
)
print
(
"└── done"
)
return
response
def
get_form_data
(
form
):
return
[
(
elem
[
'name'
],
elem
[
'value'
])
for
elem
in
form
.
find_all
(
'input'
,
attrs
=
{
'name'
:
True
,
'value'
:
True
,
})
if
elem
[
'type'
]
!=
'submit'
or
elem
[
'value'
]
.
lower
()
!=
'reject'
]
# CodeRunner típusú response history-k mentésére (különben csak az utolsó válasz tölthető le)
def
main
():
with
requests
.
Session
()
as
session
:
shibboleth_auth
(
session
,
ILIAS_URL
,
{
# BME címtár belépési adatok
'j_username'
:
print
(
os
.
environ
[
'USER'
]),
'j_password'
:
print
(
os
.
environ
[
'PASS'
]),
'_shib_idp_revokeConsent'
:
'1'
,
})
response
=
session
.
get
(
ILIAS_URL
)
content
=
response
.
content
.
decode
(
"UTF-8"
)
soup
=
BeautifulSoup
(
content
,
'html.parser'
)
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c4"
)
# neptun oszlopának class azonja
neptuns
=
[
a
.
get_text
()
for
a
in
responses
if
len
(
a
.
get_text
())
==
6
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c27"
,
limit
=
len
(
neptuns
))
# az adott részkérdés (CodeRunner) oszlopának class azonja
hrefs
=
[
a
.
find
(
"a"
,
recursive
=
False
)[
"href"
]
for
a
in
responses
]
# CodeRunner típusú response history-k mentésére (különben csak az utolsó válasz tölthető le)
todict
=
{
}
for
i
in
range
(
0
,
len
(
neptuns
)):
neptun
=
neptuns
[
i
]
url
=
hrefs
[
i
]
response
=
session
.
get
(
url
)
content
=
response
.
content
.
decode
(
"UTF-8"
)
soup
=
BeautifulSoup
(
content
,
'html.parser'
)
soup
=
soup
.
find
(
"div"
,
class_
=
"responsehistoryheader"
)
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c1"
)
times
=
[
a
.
get_text
()
for
a
in
responses
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c2"
)
codes
=
[
a
.
get_text
()
for
a
in
responses
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c3"
)
states
=
[
a
.
get_text
()
for
a
in
responses
]
responses
=
soup
.
find_all
(
"td"
,
class_
=
"cell c4 lastcol"
)
marks
=
[
a
.
get_text
()
for
a
in
responses
]
todict
[
neptun
]
=
{
"url"
:
url
,
"times"
:
times
,
"codes"
:
codes
,
"states"
:
states
,
"marks"
:
marks
}
with
open
(
"ZH2Ac27.json"
,
'w'
,
encoding
=
'utf8'
)
as
json_file
:
json
.
dump
(
todict
,
json_file
)
main
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment