Commit 182c9eb4 by Karsa Zoltán István

MoodleScraping utils

import requests
import ssl
import json
import os
from bs4 import BeautifulSoup
from urllib.parse import urljoin
# az adott ZH eredményeire mutató összefoglaló link
ssl._create_default_https_context = ssl._create_unverified_context
def shibboleth_auth(session, url, credentials):
print("Shibboleth Auth…")
print("├── request target resource")
response = session.get(url, verify=False, allow_redirects=True)
soup = BeautifulSoup(response.content, 'html.parser')
if link := soup.find('a', class_='login-identityprovider-btn'):
print("├── landing page")
response = session.get(link['href'], allow_redirects=True)
soup = BeautifulSoup(response.content.decode('UTF-16LE'), 'html.parser')
if soup.find('input', id='login-form_password'):
print("├── login credentials")
form = soup.find('form')
data = [
(name, value)
for name, value in get_form_data(form)
if name not in credentials
data.append(('_eventId_proceed', ''))
response =
urljoin(response.url, form['action']), data=dict(data))
soup = BeautifulSoup(response.content, 'html.parser')
if soup.find('input', attrs={'name': '_shib_idp_consentIds'}):
print("├── grant permissions")
form = soup.find('form')
data = get_form_data(form)
response =
urljoin(response.url, form['action']), data=data)
soup = BeautifulSoup(response.content, 'html.parser')
if soup.find('input', attrs={'name': 'SAMLResponse'}):
print("├── forward login token")
form = soup.find('form')
data = get_form_data(form)
response =
urljoin(response.url, form['action']), data=data)
print("└── done")
return response
def get_form_data(form):
return [
(elem['name'], elem['value'])
for elem in form.find_all('input', attrs={
'name': True,
'value': True,
if elem['type'] != 'submit' or elem['value'].lower() != 'reject'
# CodeRunner típusú response history-k mentésére (különben csak az utolsó válasz tölthető le)
def main():
with requests.Session() as session:
shibboleth_auth(session, ILIAS_URL, {
# BME címtár belépési adatok
'j_username': print(os.environ['USER']),
'j_password': print(os.environ['PASS']),
'_shib_idp_revokeConsent': '1',
response = session.get(ILIAS_URL)
content = response.content.decode("UTF-8")
soup = BeautifulSoup(content, 'html.parser')
responses = soup.find_all("td", class_="cell c4") # neptun oszlopának class azonja
neptuns = [a.get_text() for a in responses if len(a.get_text()) == 6]
responses = soup.find_all("td", class_="cell c27", limit=len(neptuns)) # az adott részkérdés (CodeRunner) oszlopának class azonja
hrefs = [a.find("a" , recursive=False)["href"] for a in responses]
# CodeRunner típusú response history-k mentésére (különben csak az utolsó válasz tölthető le)
todict = { }
for i in range(0, len(neptuns)):
neptun = neptuns[i]
url = hrefs[i]
response = session.get(url)
content = response.content.decode("UTF-8")
soup = BeautifulSoup(content, 'html.parser')
soup = soup.find("div", class_="responsehistoryheader")
responses = soup.find_all("td", class_="cell c1")
times = [a.get_text() for a in responses]
responses = soup.find_all("td", class_="cell c2")
codes = [a.get_text() for a in responses]
responses = soup.find_all("td", class_="cell c3")
states = [a.get_text() for a in responses]
responses = soup.find_all("td", class_="cell c4 lastcol")
marks = [a.get_text() for a in responses]
todict[neptun] = {
"url": url,
"times": times,
"codes": codes,
"states": states,
"marks": marks
with open("ZH2Ac27.json", 'w', encoding ='utf8') as json_file:
json.dump(todict, json_file)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment