dbosk · dbosk · Apr 12, 2021 · Apr 13, 2021 · Apr 13, 2021 · Apr 13, 2021
diff --git a/examples/example_Student.py b/examples/example_Student.py
@@ -16,7 +16,7 @@
 for course in me.courses():
     print(f"{course.code} {course.name}")
 
-course = me.courses(code="DD2395")[0]
+course = me.courses(code="DD2395")[-1]
 
 print(f"{course.code} results:")
 for result in course.results():
@@ -30,7 +30,13 @@
 print()
 
 student = ladok.get_student("1234561234")
-prgi = student.courses(code="DD1315")[0]
+prgis = student.courses(code="DD1315")
+print(f"{student} registered {len(prgis)} times")
+
+for reg in prgis:
+    print(reg)
+
+prgi = student.courses(code="DD1315")[-1]
 
 print(f"{student.personnummer} {student.first_name} {student.last_name}")
 

diff --git a/src/ladok3/api.nw b/src/ladok3/api.nw
@@ -2,6 +2,9 @@
 
 We will now document some possible API calls to LADOK.
 
+
+\section{HTTP queries to LADOK}
+
 To make things easier, we will add three methods: [[get_query]], [[put_query]] 
 and [[post_query]], which are shortcuts to make GET, PUT and POST queries to 
 LADOK.
@@ -42,6 +45,9 @@ def post_query(self, path, post_data,
     headers=headers)
 @
 
+
+\section{Cleaning data for printing}
+
 We sometimes want to print the data, for instance, example output in this 
 document.
 For this reason we introduce some cleaning functions.
@@ -87,6 +93,9 @@ def pseudonymize(json_obj):
       pseudonymize(item)
 @
 
+
+\section{Test code for the API}
+
 We will use the following to test the following API methods.
 \begin{pyblock}[apitest][numbers=left]
 import json
@@ -128,17 +137,49 @@ The output looks like this.
 
 \section{[[registrations_JSON]]}
 
+This methods returns \emph{all} registrations for a student, \ie registrations 
+on courses and programmes.
+<<LadokSession data methods>>=
+def registrations_JSON(self, student_id):
+  """Return all registrations for student with ID student_id."""
+  response = self.get_query(
+    '/studiedeltagande/tillfallesdeltagande/kurstillfallesdeltagande/student/'+
+      student_id,
+    "application/vnd.ladok-studiedeltagande+json")
+
+  if response.status_code == 200:
+    return response.json()["Tillfallesdeltaganden"]
+  return None
+@
+
+This method is used as follows.
+\begin{pyblock}[apitest][numbers=left,firstnumber=last]
+me = ladok.get_student("de709f81-a867-11e7-8dbf-78e86dc2470c")
+
+results = ladok.registrations_JSON(me.ladok_id)
+
+ladok3.clean_data(results)
+print(json.dumps(results, indent=2))
+\end{pyblock}
+The output looks like this.
+\stdoutpythontex[verbatim]
+
+
+
+\section{[[registrations_on_course_JSON]]}
+
 This method returns all registrations on a particular course for a particular 
 student.
 This way we can check if a student has been registered several times on a 
 course.
 <<LadokSession data methods>>=
-def registrations_JSON(self, course_education_id, student_id):
+def registrations_on_course_JSON(self,
+    course_education_id, student_id):
   """Return a list of registrations on course with education_id for student 
   with student_id. JSON format."""
   response = self.get_query(
-    "/studiedeltagande/tillfallesdeltagande/"
-      f"utbildning/{course_education_id}/student/{student_id}",
+    "/studiedeltagande/tillfallesdeltagande"
+      f"/utbildning/{course_education_id}/student/{student_id}",
     "application/vnd.ladok-studiedeltagande+json")
 
   if response.status_code == 200:
@@ -151,7 +192,8 @@ This method is used as follows.
 me = ladok.get_student("de709f81-a867-11e7-8dbf-78e86dc2470c")
 dasak = me.courses(code="DD2395")[0]
 
-results = ladok.registrations_JSON(dasak.education_id, me.ladok_id)
+results = ladok.registrations_on_course_JSON(dasak.education_id,
+  me.ladok_id)
 
 ladok3.clean_data(results)
 print(json.dumps(results, indent=2))
@@ -498,7 +540,7 @@ response from the [[create_result_JSON]] method.
 This method is used as follows.
 \begin{pyblock}[apitest][numbers=left,firstnumber=last]
 attestants = ladok.result_attestants_JSON(
-  "a1ff1fda-881e-11eb-b9f5-10126f8746d1")
+  "d05c1e97-4c1e-11eb-8e41-bc743cd4482b")
 
 print(json.dumps(attestants[0], indent=2))
 \end{pyblock}

diff --git a/src/ladok3/data.nw b/src/ladok3/data.nw
@@ -98,6 +98,7 @@ This yields \cref{GradeStatsAvg}.
 
 \stdoutpythontex
 
+
 \section{The [[data]] subcommand}\label{DataCommand}
 
 This is a subcommand run as part of the [[ladok3.cli]] module.
@@ -128,7 +129,10 @@ We add a subparser.
 We set it up to use the function [[command]].
 <<add data parser to parser>>=
 data_parser = parser.add_parser("data",
-  help="Returns course results data in CSV form")
+  help="Returns course results data in CSV form",
+  description="""
+Returns the results in CSV form for all first-time registered students.
+""".strip())
 data_parser.set_defaults(func=command)
 @
 
@@ -153,7 +157,6 @@ data_writer.writerow([
 ])
 for course_round in course_rounds:
   data = extract_data_for_round(ladok, course_round)
-  data = clean_data(data)
 
   for student, component, grade, time in data:
     data_writer.writerow(
@@ -175,18 +178,27 @@ def extract_data_for_round(ladok, course_round):
   <<compute start and length of the course>>
   <<get the results for the course round>>
 
-  for result in results:
-    student = result["Student"]["Uid"]
+  for student in course_round.participants():
+    student_results = filter_student_results(student, results)
 
-    for component_result in result["ResultatPaUtbildningar"]:
-      if component_result["HarTillgodoraknande"]:
-        continue
+    <<determine if student should be included>>
 
-      <<get the component result data>>
-      <<extract component code from result data>>
-      <<extract grade and normalized date from result data>>
+    if len(student_results) < 1:
+      for component in course_round.components():
+        yield student, component, "-", None
+      continue
 
-      yield (student, component_code, grade, normalized_date)
+    for component in course_round.components():
+      result_data = filter_component_result(
+        component, student_results[0]["ResultatPaUtbildningar"])
+
+      if result_data:
+        <<extract grade and normalized date from result data>>
+      else:
+        grade = "-"
+        normalized_date = None
+
+      yield student, component, grade, normalized_date
 @
 
 We need the start of the course and the length to be able to normalize the 
@@ -206,6 +218,31 @@ results = ladok.search_reported_results_JSON(
   course_round.round_id, component.instance_id)
 @
 
+Now, we don't iterate over these results.
+We iterate over the students and the components of a course round.
+LADOK doesn't report \enquote{none results}.
+But we want to have a result showing that a student hasn't done anything, that 
+should affect the statistics.
+Then we must search for a student's result in the batch of results we received 
+from LADOK.
+<<functions>>=
+def filter_student_results(student, results):
+  return list(filter(
+    lambda x: x["Student"]["Uid"] == student.ladok_id,
+    results))
+@
+
+Similarly, we want to find the result for a particular component.
+<<functions>>=
+def filter_component_result(component, results):
+  for component_result in results:
+    <<get the component result data>>
+    <<check component code in result data>>
+    return result_data
+
+  return None
+@
+
 Depending on whether the data is attested or not, we can get the actual grade 
 and date from two different substructures:
 \enquote{Arbetsunderlag} are results in LADOK that have been entered, but not 
@@ -227,12 +264,9 @@ The [[course_round]] object allows us to do exactly that with the
 [[components]] method.
 We note that we can ignore the grade on the whole course, since that one is 
 determined by the other components.
-<<extract component code from result data>>=
-matching_component = course_round.components(
-  instance_id=result_data["UtbildningsinstansUID"])
-if len(matching_component) < 1:
+<<check component code in result data>>=
+if component.instance_id != result_data["UtbildningsinstansUID"]:
   continue
-component_code = matching_component[0].code
 @
 
 Finally, if there is a grade, we can extract the grade and compute the 
@@ -249,34 +283,57 @@ else:
   normalized_date = None
 @
 
+However, we don't want to include all students.
+We check if a student should be included or not, the criteria are discussed in 
+\cref{WhoToInclude}.
+<<determine if student should be included>>=
+if not should_include(ladok, student, course_round, student_results):
+  continue
+@
+
 
-\section{Clean the data}
+\section{Which students to exclude}\label{WhoToInclude}
 
 We want to filter out some values from the data.
-We only want to keep students who are registered on the course the first time.
+We only want to keep students who are registered on the course the first time 
+and who doesn't have any credit transfer on the course.
+<<functions>>=
+def should_include(ladok, student, course_round, result):
+  """Returns True if student should be included, False if to be excluded"""
+  if is_reregistered(ladok, student.ladok_id, course_round):
+    return False
+
+  if has_credit_transfer(result):
+    return False
+
+  return True
+@
+
+A student should be counted on the first round they were registered on.
+We check if a student is reregistered by checking if the course round is the 
+first round the student was registered on.
 <<functions>>=
-def clean_data(data):
-  data = list(data)
-  students_to_remove = reregistered_students(data)
-  return remove_students(students_to_remove, data)
+def is_reregistered(ladok, student_id, course):
+  """Check if the student is reregistered on the course round course."""
+  registrations = ladok.registrations_on_course_JSON(
+    course.education_id, student_id)
+  registrations.sort(
+    key=lambda x: x["Utbildningsinformation"]["Studieperiod"]["Startdatum"])
+  first_reg = registrations[0]
+  return first_reg["Utbildningsinformation"]["Utbildningstillfalleskod"] != \
+    course.round_code
 @
 
-We approximate first time registrations with grades reported before the course 
-started.
-Thus we remove any student who has a result before the course.
-It would be more exact to remove students who are in fact reregistered in 
-LADOK, but we leave that for a future version.
+If the student has a credit transfer for any part of the course, we should 
+exclude the student.
 <<functions>>=
-def reregistered_students(data):
-  students = set()
-  for student, _, _, time in data:
-    if time and time < 0:
-      students.add(student)
-  return students
-
-def remove_students(students, data):
-  for row in data:
-    if row[0] not in students:
-      yield row
+def has_credit_transfer(results):
+  """Returns True if there exists a credit tranfer among the results."""
+  for result in results:
+    for component_result in result["ResultatPaUtbildningar"]:
+      if component_result["HarTillgodoraknande"]:
+        return True
+
+  return False
 @