Commit 28fad0c3 by Karsa Zoltán István

Summary plots py

parent 182c9eb4
import matplotlib.pyplot as plt
import json
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.metrics import mean_squared_error
import statistics
from scipy.stats import norm
import scipy.stats as stats
def read_moodle_json(file_path, key_list):
try:
with open(file_path, 'r') as file:
data = json.load(file)[0]
responses = { }
for item in data:
neptun = item["idnumber"]
responses[neptun] = { }
for q in key_list:
responses[neptun][q] = item[q]
return responses
except FileNotFoundError:
print(f"Error: The file {file_path} was not found.")
except json.JSONDecodeError:
print(f"Error: The file {file_path} contains invalid JSON.")
ZH1_A_responses = read_moodle_json("BMEVIIIAA03_HU-1.NZHAcsoport-responses.json",
["response12", "response13", "response14", "response15", "response16"])
ZH1_A_grades= read_moodle_json("BMEVIIIAA03_HU-1.NZHAcsoport-grades.json",
["grade4000", "q12300", "q13500", "q14400", "q15300", "q16200"])
teachers = ["Zoltán István Karsa", "..."]
graders = { }
def rename_key(old_key, extra_key, except_keys = ["name", "username", "idnumber", "emailaddress"]):
if old_key in except_keys:
return old_key
return extra_key + old_key
def rename_dict(dict, extra_key):
new = { }
for i in dict.keys():
key = rename_key(i, extra_key)
new[key] = dict[i]
return new
def read_json_to_pandas(file_path, assigment = ""):
try:
with open(file_path, 'r') as file:
data = json.load(file)[0]
filtered = [d for d in data if d['source'] == "mod/quiz" and d["name"] not in teachers and d["revisedgrade"] != ""]
sorted_data = sorted(
filtered,
key=lambda x: datetime.strptime(x["dateandtime"], "%A, %d %B %Y, %I:%M %p"), reverse=False
)
filt_duplicate = { }
for i in sorted_data:
if i["username"] not in filt_duplicate:
i["dateandtime"] = datetime.strptime(i["dateandtime"], "%A, %d %B %Y, %I:%M %p")
i["originalgrade"] = float(i["revisedgrade"])
i["revisedgrade"] = float(i["revisedgrade"])
i["grader"] = "student"
filt_duplicate[i["username"]] = rename_dict(i, assigment)
else:
filt_duplicate[i["username"]][rename_key("revisedgrade", assigment)] = float(i["revisedgrade"])
filt_duplicate[i["username"]][rename_key("overridden", assigment)] = "Yes"
filt_duplicate[i["username"]][rename_key("grader", assigment)] = i["grader"]
if i["grader"] not in graders:
graders[i["grader"]] = 1
else:
graders[i["grader"]] = graders[i["grader"]] + 1
dataframe = pd.DataFrame(filt_duplicate.values())
dataframe = dataframe.set_index("username")
dataframe[rename_key("diff", assigment)] = abs(dataframe[rename_key("revisedgrade", assigment)] - dataframe[rename_key("originalgrade", assigment)])
return dataframe
except FileNotFoundError:
print(f"Error: The file {file_path} was not found.")
except json.JSONDecodeError:
print(f"Error: The file {file_path} contains invalid JSON.")
df = pd.concat([read_json_to_pandas("2024ZH1A.json", "ZH1-"), read_json_to_pandas("2024ZH1B.json", "ZH1-")])
df2 = pd.concat([read_json_to_pandas("2024ZH2A.json", "ZH2-"), read_json_to_pandas("2024ZH2B.json", "ZH2-")])
df3 = pd.concat([read_json_to_pandas("2024PZH1.json", "PZH1-")])
df4 = pd.concat([read_json_to_pandas("2024PZH2.json", "PZH2-")])
df = pd.concat([df, df2, df3, df4], axis=1)
#df.to_excel("output.xlsx")
ZH1_over_notnull = df[(df[rename_key("overridden", "ZH1-")] == "Yes") & df[[rename_key("originalgrade", "ZH1-"), rename_key("revisedgrade", "ZH1-")]].notnull().all(1)]
ZH2_over_notnull = df[(df[rename_key("overridden", "ZH2-")] == "Yes") & df[[rename_key("originalgrade", "ZH2-"), rename_key("revisedgrade", "ZH2-")]].notnull().all(1)]
ZH1_notnull = df[df[[rename_key("originalgrade", "ZH1-"), rename_key("revisedgrade", "ZH1-")]].notnull().all(1)]
ZH2_notnull = df[df[[rename_key("originalgrade", "ZH2-"), rename_key("revisedgrade", "ZH2-")]].notnull().all(1)]
ZH_notnull = df[df[[rename_key("originalgrade", "ZH1-"), rename_key("revisedgrade", "ZH1-"), rename_key("originalgrade", "ZH2-"), rename_key("revisedgrade", "ZH2-")]].notnull().all(1)]
Z, xedges, yedges = np.histogram2d(ZH_notnull[rename_key("originalgrade", "ZH1-")], ZH_notnull[rename_key("originalgrade", "ZH2-")], bins=40, range=[[0, 40],[0, 40]])
Z2, xedges, yedges = np.histogram2d(ZH_notnull[rename_key("revisedgrade", "ZH1-")], ZH_notnull[rename_key("revisedgrade", "ZH2-")], bins=40, range=[[0, 40],[0, 40]])
im = plt.pcolormesh(xedges, yedges, Z2-Z, shading='flat',
#cmap = plt.colormaps['Greys']
)
plt.colorbar(im)
plt.axline((0,40), slope=-1, c="black", linestyle='dashed', label="Min. Req")
plt.xlabel("ZH1 grade")
plt.ylabel("ZH2 grade")
#plt.legend()
plt.show()
mu, std = norm.fit(ZH1_notnull[rename_key("originalgrade", "ZH1-")])
mean_revised_zh1 = statistics.mean(ZH1_notnull[rename_key("revisedgrade", "ZH1-")])
sd_revised_zh1 = statistics.stdev(ZH1_notnull[rename_key("revisedgrade", "ZH1-")])
x_axis = np.arange(0, 40, 0.01)
plt.hist([df[rename_key("originalgrade", "ZH1-")], df[rename_key("revisedgrade", "ZH1-")],
#df[rename_key("originalgrade", "ZH2-")], df[rename_key("revisedgrade", "ZH2-")]
],
label=["Original grade ZH1", "Revised grade ZH1",
# "Original grade ZH2", "Revised grade ZH2"
],
color=["red", "blue",
#"blue", "cornflowerblue"
], bins=20, )
plt.plot(x_axis, norm.pdf(x_axis, mu, std) * len(df[rename_key("originalgrade", "ZH1-")]), label="Original N", color="red")
mu, std = norm.fit(ZH1_notnull[rename_key("revisedgrade", "ZH1-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std) * len(df[rename_key("revisedgrade", "ZH1-")]), label="Revised N", color="blue")
plt.xlabel('Point')
plt.ylabel('Frequency')
plt.legend()
plt.show()
counts, bins, bars = plt.hist([ZH1_over_notnull[rename_key("originalgrade", "ZH1-")], ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")],
ZH2_over_notnull[rename_key("originalgrade", "ZH2-")], ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")]],
label=["Original grade ZH1", "Revised grade ZH1", "Original grade ZH2", "Revised grade ZH2"],
color=["red", "lightcoral", "blue", "cornflowerblue"], bins=10,
#density=True
)
count = len(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")])
mu, std = norm.fit(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std)*count, label="Original ZH1 N", color="red")
density = [sum(counts[0][0:3])]
density.extend(counts[0][3:])
density = np.array(density)
print(density)
limits = [16, 20, 24, 28, 32, 36, 40]
expected = norm.cdf(limits, mu, std) * float(count)
propability = [expected[0]]
propability.extend(expected[1:] - expected[0:-1])
propability.append(count - expected[-1])
print(propability)
test_stat, p_value = stats.chisquare(density, propability)
# chi square test statistic and p value
print('chi_square_test_statistic is : ' +
str(test_stat))
print('p_value : ' + str(p_value))
print(stats.chi2.ppf(1-0.05, df=14))
mu, std = norm.fit(ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std)*count, label="Revised ZH1 N", color="lightcoral")
mu, std = norm.fit(ZH2_over_notnull[rename_key("originalgrade", "ZH2-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std), label="Original ZH2 N", color="blue")
mu, std = norm.fit(ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std), label="Revised ZH2 N", color="cornflowerblue")
plt.xlabel('Point')
plt.ylabel('Density')
plt.legend()
plt.show()
counts, bins, bars = plt.hist([ZH1_over_notnull[rename_key("originalgrade", "ZH1-")], ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")],
ZH2_over_notnull[rename_key("originalgrade", "ZH2-")], ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")]],
label=["Original grade ZH1", "Revised grade ZH1", "Original grade ZH2", "Revised grade ZH2"],
color=["red", "lightcoral", "blue", "cornflowerblue"], bins=20,
#density=True
)
count = len(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")])
mu, std = norm.fit(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std)*count, label="Original ZH1 N", color="red")
density = [sum(counts[0][0:6])]
density.extend(counts[0][6:])
density = np.array(density)
limits = [14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40]
expected = norm.cdf(limits, mu, std) * float(count)
propability = [expected[0]]
propability.extend(expected[1:] - expected[0:-1])
propability.append(count - expected[-1])
test_stat, p_value = stats.chisquare(density, propability)
# chi square test statistic and p value
print('chi_square_test_statistic is : ' +
str(test_stat))
print('p_value : ' + str(p_value))
print(stats.chi2.ppf(1-0.05, df=14))
mu, std = norm.fit(ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std)*count, label="Revised ZH1 N", color="lightcoral")
mu, std = norm.fit(ZH2_over_notnull[rename_key("originalgrade", "ZH2-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std), label="Original ZH2 N", color="blue")
mu, std = norm.fit(ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")])
plt.plot(x_axis, norm.pdf(x_axis, mu, std), label="Revised ZH2 N", color="cornflowerblue")
plt.xlabel('Point')
plt.ylabel('Density')
plt.legend()
plt.show()
coef = np.polyfit(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")], ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")], 1)
lin = np.poly1d(coef)
minmax = [0, 40] #[ZH1_over_notnull[rename_key("originalgrade", "ZH1-")].min(), ZH1_over_notnull[rename_key("originalgrade", "ZH1-")].max()]
plt.plot(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")], ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")], '+', label="ZH1", color = "red")
plt.plot(minmax, lin(minmax), ':', color="lightcoral", label="ZH1 lin")
coef = np.polyfit(ZH2_over_notnull[rename_key("originalgrade", "ZH2-")], ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")], 1)
lin = np.poly1d(coef)
minmax = [0, 40] #[ZH2_over_notnull[rename_key("originalgrade", "ZH2-")].min(), ZH2_over_notnull[rename_key("originalgrade", "ZH2-")].max()]
plt.plot(ZH2_over_notnull[rename_key("originalgrade", "ZH2-")], ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")], '*', label="ZH2", color = "blue")
plt.plot(minmax, lin(minmax), '--', label="ZH2 lin", color="cornflowerblue")
plt.axline((0,0), slope=1, c="black", label="y=x")
plt.xlabel("Original grade")
plt.ylabel("Revised grade")
plt.legend()
plt.savefig("revised_lin.svg", format = 'svg', dpi=300)
plt.show()
MSE_ZH1 = np.sqrt(mean_squared_error(ZH1_over_notnull[rename_key("originalgrade", "ZH1-")], ZH1_over_notnull[rename_key("revisedgrade", "ZH1-")]))
print(MSE_ZH1)
MSE_ZH2 = np.sqrt(mean_squared_error(ZH2_over_notnull[rename_key("originalgrade", "ZH2-")], ZH2_over_notnull[rename_key("revisedgrade", "ZH2-")]))
print(MSE_ZH2)
plt.boxplot([ZH1_over_notnull["ZH1-originalgrade"], ZH1_over_notnull["ZH1-revisedgrade"], ZH2_over_notnull["ZH2-originalgrade"], ZH2_over_notnull["ZH2-revisedgrade"]],
labels=["Original ZH1", "Revised ZH1", "Original ZH2", "Revised ZH2"])
plt.title('Box Plot of grades')
plt.ylabel('Values')
plt.show()
plt.boxplot([df[(df[rename_key("overridden", "ZH1-")] == "Yes")][rename_key("diff", "ZH1-")],
df[(df[rename_key("overridden", "ZH2-")] == "Yes")][rename_key("diff", "ZH2-")]], labels=["Only difference ZH1", "Only difference ZH2"])
plt.title('Box Plot of grades')
plt.ylabel('Values')
plt.show()
diff_by_graders_zh1 = [df[(df[rename_key("grader", "ZH1-")] == g)][rename_key("diff", "ZH1-")] for g in graders.keys()]
diff_by_graders_zh2 = [df[(df[rename_key("grader", "ZH2-")] == g)][rename_key("diff", "ZH2-")] for g in graders.keys()]
diff_by_graders_pzh1 = [df[(df[rename_key("grader", "PZH1-")] == g)][rename_key("diff", "PZH1-")] for g in graders.keys()]
diff_by_graders_pzh2 = [df[(df[rename_key("grader", "PZH2-")] == g)][rename_key("diff", "PZH2-")] for g in graders.keys()]
graders_anonym = [chr(i) for i in range(ord('A'),ord('A')+len(graders))]
fig, ax1 = plt.subplots()
ax1.set_ylabel("Corrected piece (bar)")
ax1.bar(range(1, len(graders_anonym) + 1), [s.size for s in diff_by_graders_zh1], alpha=0.3)
ax1.set_xlabel("Grader")
color = 'tab:red'
ax2 = ax1.twinx()
ax2.set_ylabel("The points improved (boxplot)")
ax2.boxplot(diff_by_graders_zh1, labels=graders_anonym)
fig.tight_layout()
plt.savefig("graders_ZH1.svg", format = 'svg', dpi=300)
plt.show()
fig, ax1 = plt.subplots()
ax1.set_ylabel("Corrected piece (bar)")
ax1.bar(range(1, len(graders_anonym) + 1), [s.size for s in diff_by_graders_zh2], alpha=0.3)
ax1.set_xlabel("Grader")
color = 'tab:red'
ax2 = ax1.twinx()
ax2.set_ylabel("The points improved (boxplot)")
ax2.boxplot(diff_by_graders_zh2, labels=graders_anonym)
fig.tight_layout()
plt.savefig("graders_ZH2.svg", format = 'svg', dpi=300)
plt.show()
diff_summary = [pd.concat([diff_by_graders_zh1[i], diff_by_graders_zh2[i], diff_by_graders_pzh1[i], diff_by_graders_pzh2[i]]) for i in range(0, len(diff_by_graders_zh1))]
plt.boxplot(diff_summary, labels=graders_anonym)
plt.title('Box Plot of difference by graders')
plt.ylabel('Values')
plt.show()
diff_by_graders_zh1 = [df[(df[rename_key("grader", "ZH1-")] == g)][rename_key("originalgrade", "ZH1-")] for g in graders.keys()]
diff_by_graders_zh2 = [df[(df[rename_key("grader", "ZH2-")] == g)][rename_key("originalgrade", "ZH2-")] for g in graders.keys()]
diff_by_graders_pzh1 = [df[(df[rename_key("grader", "PZH1-")] == g)][rename_key("originalgrade", "PZH1-")] for g in graders.keys()]
diff_by_graders_pzh2 = [df[(df[rename_key("grader", "PZH2-")] == g)][rename_key("originalgrade", "PZH2-")] for g in graders.keys()]
graders_anonym = [chr(i) for i in range(ord('A'),ord('A')+len(graders))]
plt.boxplot(diff_by_graders_zh1, labels=graders_anonym)
plt.title('Box Plot of teachers ZH1')
plt.ylabel('Original grade')
plt.show()
plt.boxplot(diff_by_graders_zh2, labels=graders_anonym)
plt.title('Box Plot of teachers ZH2')
plt.ylabel('Original grade')
plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment