From 758d2d8e367ee6f655b650c59604cd69c8cf1b93 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Wed, 13 Mar 2024 11:57:33 -0600 Subject: [PATCH 01/70] first draft of survey_responses notebook notebook currently uses the survey_info part of the config uses config to get the surveys, and use the xlsx files to translate between data and readable labels generates a plot for every question present in the data hopefully will work with multiple surveys, but only tested with 1 so far --- viz_scripts/survey_responses.ipynb | 269 +++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 viz_scripts/survey_responses.ipynb diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb new file mode 100644 index 0000000..7c8f5ec --- /dev/null +++ b/viz_scripts/survey_responses.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a7fa9a20", + "metadata": {}, + "outputs": [], + "source": [ + "# %conda install openpyxl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c7fbf97", + "metadata": {}, + "outputs": [], + "source": [ + "year = None\n", + "month = None\n", + "program = \"washingtoncommons\"\n", + "study_type = \"study\"\n", + "mode_of_interest = None\n", + "include_test_users = False\n", + "dynamic_labels = { }\n", + "use_imperial = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "585410e0", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "import urllib.request\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from plots import *\n", + "import scaffolding\n", + "\n", + "sns.set_style(\"whitegrid\")\n", + "sns.set()\n", + "%matplotlib inline\n", + "\n", + "# get metric vs imperial vars\n", + "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a85ca35", + "metadata": {}, + "outputs": [], + "source": [ + "#probably going to end up passing this in\n", + "survey_info = {\n", + " \"surveys\": {\n", + " \"UserProfileSurvey\": {\n", + " \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-onboarding-survey-v4.xml\",\n", + " \"version\": 1.3,\n", + " \"compatibleWith\": 1,\n", + " \"dataKey\": \"manual/demographic_survey\",\n", + " \"labelTemplate\": {\n", + " \"en\": \"Answered\",\n", + " \"es\": \"Contestada\"\n", + " }\n", + " },\n", + " \"TripConfirmSurvey\": {\n", + " \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-trip-survey-v2.json\",\n", + " \"version\": 1.2,\n", + " \"compatibleWith\": 1,\n", + " \"dataKey\": \"manual/trip_user_input\",\n", + " \"labelVars\": {\n", + " \"modes\": {\n", + " \"key\": \"What_was_the_main_pu_f_this_trip_trip_leg\",\n", + " \"type\": \"length\"\n", + " },\n", + " \"purposes\": {\n", + " \"key\": \"_2_What_was_the_mode_of_transp\",\n", + " \"type\": \"length\"\n", + " }\n", + " },\n", + " \"labelTemplate\": {\n", + " \"en\": \"{ purposes, plural, =0 {No purposes} one {1 purpose} other {# purposes} }, { modes, plural, =0 {No modes} one {1 mode} other {# modes} }\",\n", + " \"es\": \"{ purposes, plural, =0 {No propósitos} one {1 propósito} other {# propósitos} }, { modes, plural, =0 {No modos} one {1 modo} other {# modos} }\"\n", + " }\n", + " }\n", + " },\n", + " \"trip-labels\": \"ENKETO\"\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf0b2f08", + "metadata": {}, + "outputs": [], + "source": [ + "#input: list of survey names from the config\n", + "#output: list of links to the sheets where questions/answers are\n", + "#will run n surveys times\n", + "def get_sheet_links(survey_list):\n", + " sheet_list = []\n", + " for name in survey_list:\n", + " form_path = survey_info['surveys'][name]['formPath']\n", + " #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH XLSX FILE TYPE\n", + " l_path = form_path.split('.')\n", + " l_path[-1] = 'xlsx'\n", + " s = '.'\n", + " sheet_path = s.join(l_path)\n", + " sheet_list.append(sheet_path)\n", + " return sheet_list\n", + "\n", + "#input: list of urls for the survey xlsx files\n", + "#output: two dictionaries to translate the ?s/ans\n", + "#will run n surveys times\n", + "def build_dictionaries(url_list):\n", + " opt_dicts = {}\n", + " quest_dicts = {}\n", + " \n", + " for url in sheet_list:\n", + " result = urllib.request.urlopen(url).read()\n", + " xls = pd.ExcelFile(result)\n", + " questions = pd.read_excel(xls, 'survey')\n", + " options = pd.read_excel(xls, 'choices')\n", + "\n", + " opt_dict.update(dict(zip(options.name, options.label)))\n", + " quest_dict.update(dict(zip(questions.name, questions.label)))\n", + " \n", + " return opt_dict, quest_dict\n", + "\n", + "#input: dataframe containing all trips that have non-blank user_input\n", + "#output: dataframe with questions in the columns and answers in the rows\n", + "#for loop will run n survey responses times (this could get big!)\n", + "def create_dataframe(df_trips_w_surveys):\n", + " df = df_trips_w_surveys.reset_index()\n", + " rows = []\n", + " for i in range(len(df)):\n", + " row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse']['data'])\n", + " rows.append(row)\n", + "\n", + " df = pd.concat(rows)\n", + " #drop the non-question columns, should leave behind all the questions\n", + " df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'meta.instanceID', 'meta.deprecatedID'])\n", + "\n", + " return df\n", + "\n", + "#input: list of labels that will end up on the chart\n", + "#output: translated to readable list, with multiples handled\n", + "#the for loop will run n times, where num_options <= n < all possible combinations of options\n", + "#if people are selecting many different combinations, could be large\n", + "def traslate_options(labels):\n", + " for i in range(len(labels)):\n", + " l_labels = labels[i].split(\" \")\n", + " for k in range(len(l_labels)):\n", + " print()\n", + " l_labels[k] = opt_dict[l_labels[k]]\n", + " sep = \"\\n\"\n", + " labels[i] = sep.join(l_labels)\n", + " \n", + " return labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b9db890", + "metadata": {}, + "outputs": [], + "source": [ + "#list of all surveys that are not a \"UserProfileSurvey\"\n", + "survey_list = list(survey_info['surveys'].keys())\n", + "survey_list.remove('UserProfileSurvey')\n", + "sheet_list = get_sheet_links(survey_list)\n", + "\n", + "print('survey sheets: ', sheet_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caeb880b", + "metadata": {}, + "outputs": [], + "source": [ + "#load all of the confirmed trips\n", + "tq = scaffolding.get_time_query(year, month)\n", + "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n", + "\n", + "#remove blank inputs\n", + "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n", + "print(len(survey_trips))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c00da0a7", + "metadata": {}, + "outputs": [], + "source": [ + "#create translation dictionaries\n", + "opt_dicts, quest_dicts = build_dictionaries(sheet_list)\n", + "\n", + "#format survey trips into responses dataframe\n", + "df_responses = create_dataframe(survey_trips)\n", + "\n", + "#replace questions\n", + "df_responses = df_responses.rename(columns = quest_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08d04b39", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "#create one plot per question\n", + "for col in df.columns:\n", + " quest_frame = df.copy()\n", + " quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n", + " labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist())\n", + " values = quest_frame[col].value_counts(dropna=True).tolist()\n", + " \n", + " pie_chart_purpose(col, labels, values, \"howdy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "132e00a1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From a5262573d73df89ea5c4495a334942f437f84e9a Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Thu, 14 Mar 2024 18:39:56 -0600 Subject: [PATCH 02/70] add quality text to pie charts --- viz_scripts/survey_responses.ipynb | 42 ++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb index 7c8f5ec..4a5888a 100644 --- a/viz_scripts/survey_responses.ipynb +++ b/viz_scripts/survey_responses.ipynb @@ -123,8 +123,8 @@ "#output: two dictionaries to translate the ?s/ans\n", "#will run n surveys times\n", "def build_dictionaries(url_list):\n", - " opt_dicts = {}\n", - " quest_dicts = {}\n", + " opt_dict = {}\n", + " quest_dict = {}\n", " \n", " for url in sheet_list:\n", " result = urllib.request.urlopen(url).read()\n", @@ -157,7 +157,7 @@ "#output: translated to readable list, with multiples handled\n", "#the for loop will run n times, where num_options <= n < all possible combinations of options\n", "#if people are selecting many different combinations, could be large\n", - "def traslate_options(labels):\n", + "def traslate_options(labels, opt_dict):\n", " for i in range(len(labels)):\n", " l_labels = labels[i].split(\" \")\n", " for k in range(len(l_labels)):\n", @@ -166,7 +166,17 @@ " sep = \"\\n\"\n", " labels[i] = sep.join(l_labels)\n", " \n", - " return labels" + " return labels\n", + "\n", + "#input: all of the responses to a single survey\n", + "#output: text with num responses and users\n", + "#THIS SHOULD GO IN SCAFFOLDING AND INCLUDE WHOLE POOL AT SOME POINT\n", + "def get_text(responses):\n", + " num_resp = len(responses)\n", + " num_users = responses.user_id.nunique()\n", + " quality_text = f\"Based on {num_resp} responses from {num_users} users\"\n", + " print(quality_text)\n", + " return quality_text" ] }, { @@ -194,10 +204,20 @@ "#load all of the confirmed trips\n", "tq = scaffolding.get_time_query(year, month)\n", "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n", - "\n", "#remove blank inputs\n", "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n", - "print(len(survey_trips))" + "print(len(survey_trips))\n", + "\n", + "#survey counts df\n", + "survey_trips = survey_trips.reset_index()\n", + "survey_trips['survey_name'] = survey_trips.user_input.apply(lambda sr: sr['trip_user_input']['data']['name'])\n", + "\n", + "#gather the cols needed for charts and text\n", + "survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n", + "survey_trips.head()\n", + "\n", + "#get quality text\n", + "qual_text = get_text(survey_trips)" ] }, { @@ -208,7 +228,7 @@ "outputs": [], "source": [ "#create translation dictionaries\n", - "opt_dicts, quest_dicts = build_dictionaries(sheet_list)\n", + "opt_dict, quest_dict = build_dictionaries(sheet_list)\n", "\n", "#format survey trips into responses dataframe\n", "df_responses = create_dataframe(survey_trips)\n", @@ -227,13 +247,13 @@ "outputs": [], "source": [ "#create one plot per question\n", - "for col in df.columns:\n", - " quest_frame = df.copy()\n", + "for col in df_responses.columns:\n", + " quest_frame = df_responses.copy()\n", " quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n", - " labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist())\n", + " labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n", " values = quest_frame[col].value_counts(dropna=True).tolist()\n", " \n", - " pie_chart_purpose(col, labels, values, \"howdy\")" + " pie_chart_purpose(col+'\\n'+qual_text, labels, values, \"howdy\")" ] }, { From c4cfdfaa0266a88532f0dcbbbf7426b65d660f26 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Wed, 20 Mar 2024 16:21:33 -0600 Subject: [PATCH 03/70] revise the way surveys are read starting to read via xml instead of spreadsheet, more support across languages --- viz_scripts/survey_responses.ipynb | 187 +++++++++++++++-------------- 1 file changed, 98 insertions(+), 89 deletions(-) diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb index 4a5888a..5c8100e 100644 --- a/viz_scripts/survey_responses.ipynb +++ b/viz_scripts/survey_responses.ipynb @@ -1,15 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "a7fa9a20", - "metadata": {}, - "outputs": [], - "source": [ - "# %conda install openpyxl" - ] - }, { "cell_type": "code", "execution_count": null, @@ -19,7 +9,7 @@ "source": [ "year = None\n", "month = None\n", - "program = \"washingtoncommons\"\n", + "program = \"dfc-fermata\"\n", "study_type = \"study\"\n", "mode_of_interest = None\n", "include_test_users = False\n", @@ -27,6 +17,35 @@ "use_imperial = False" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce0dcc9f", + "metadata": {}, + "outputs": [], + "source": [ + "#probably going to end up passing this in\n", + "survey_info = {\n", + " \"surveys\": {\n", + " \"UserProfileSurvey\": {\n", + " \"formPath\": \"https://raw.githubusercontent.com/JGreenlee/nrel-openpath-deploy-configs/fermata-demo/survey_resources/dfc-fermata/fermata-onboarding-v0.xml\",\n", + " \"version\": 1,\n", + " \"compatibleWith\": 1,\n", + " \"dataKey\": \"manual/demographic_survey\",\n", + " \"labelTemplate\": { \"en\": \"Answered\" }\n", + " },\n", + " \"TripConfirmSurvey\": {\n", + " \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml\",\n", + " \"version\": 1,\n", + " \"compatibleWith\": 1,\n", + " \"dataKey\": \"manual/trip_user_input\",\n", + " \"labelTemplate\": { \"en\": \"Answered\" }\n", + " }\n", + " },\n", + " \"trip-labels\": \"ENKETO\"\n", + " }" + ] + }, { "cell_type": "code", "execution_count": null, @@ -35,11 +54,10 @@ "outputs": [], "source": [ "from collections import defaultdict\n", - "\n", "import urllib.request\n", - "\n", "import numpy as np\n", "import pandas as pd\n", + "from xml.dom import minidom\n", "\n", "from plots import *\n", "import scaffolding\n", @@ -55,52 +73,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9a85ca35", - "metadata": {}, - "outputs": [], - "source": [ - "#probably going to end up passing this in\n", - "survey_info = {\n", - " \"surveys\": {\n", - " \"UserProfileSurvey\": {\n", - " \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-onboarding-survey-v4.xml\",\n", - " \"version\": 1.3,\n", - " \"compatibleWith\": 1,\n", - " \"dataKey\": \"manual/demographic_survey\",\n", - " \"labelTemplate\": {\n", - " \"en\": \"Answered\",\n", - " \"es\": \"Contestada\"\n", - " }\n", - " },\n", - " \"TripConfirmSurvey\": {\n", - " \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-trip-survey-v2.json\",\n", - " \"version\": 1.2,\n", - " \"compatibleWith\": 1,\n", - " \"dataKey\": \"manual/trip_user_input\",\n", - " \"labelVars\": {\n", - " \"modes\": {\n", - " \"key\": \"What_was_the_main_pu_f_this_trip_trip_leg\",\n", - " \"type\": \"length\"\n", - " },\n", - " \"purposes\": {\n", - " \"key\": \"_2_What_was_the_mode_of_transp\",\n", - " \"type\": \"length\"\n", - " }\n", - " },\n", - " \"labelTemplate\": {\n", - " \"en\": \"{ purposes, plural, =0 {No purposes} one {1 purpose} other {# purposes} }, { modes, plural, =0 {No modes} one {1 mode} other {# modes} }\",\n", - " \"es\": \"{ purposes, plural, =0 {No propósitos} one {1 propósito} other {# propósitos} }, { modes, plural, =0 {No modos} one {1 modo} other {# modos} }\"\n", - " }\n", - " }\n", - " },\n", - " \"trip-labels\": \"ENKETO\"\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf0b2f08", + "id": "b18bc854", "metadata": {}, "outputs": [], "source": [ @@ -111,9 +84,9 @@ " sheet_list = []\n", " for name in survey_list:\n", " form_path = survey_info['surveys'][name]['formPath']\n", - " #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH XLSX FILE TYPE\n", + " #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE\n", " l_path = form_path.split('.')\n", - " l_path[-1] = 'xlsx'\n", + " l_path[-1] = 'xml'\n", " s = '.'\n", " sheet_path = s.join(l_path)\n", " sheet_list.append(sheet_path)\n", @@ -127,16 +100,38 @@ " quest_dict = {}\n", " \n", " for url in sheet_list:\n", - " result = urllib.request.urlopen(url).read()\n", - " xls = pd.ExcelFile(result)\n", - " questions = pd.read_excel(xls, 'survey')\n", - " options = pd.read_excel(xls, 'choices')\n", + " result = urllib.request.urlopen(url)\n", + " doc = minidom.parse(result) \n", "\n", - " opt_dict.update(dict(zip(options.name, options.label)))\n", - " quest_dict.update(dict(zip(questions.name, questions.label)))\n", + " labels = doc.getElementsByTagName(\"label\") \n", + " for label in labels:\n", + " if(bool(label.parentNode.getAttribute(\"ref\"))):\n", + " print(label.parentNode.getAttribute(\"ref\").split('/')[-1])\n", + " print(label.firstChild.data)\n", + " \n", + " quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n", + " \n", + " return opt_dict, quest_dict\n", + "\n", + "def build_dictionaries(url_list):\n", + " opt_dict = {}\n", + " quest_dict = {}\n", + " \n", + " for url in sheet_list:\n", + " result = urllib.request.urlopen(url)\n", + " tree = ET.parse(result)\n", + " root = tree.getroot()\n", + " \n", + " print(root.findall(\".\"))\n", + " \n", + " for child in root:\n", + " print(child.tag, child.attrib)\n", + " \n", + " print(root.findall(\".//label\"))\n", " \n", " return opt_dict, quest_dict\n", "\n", + "\n", "#input: dataframe containing all trips that have non-blank user_input\n", "#output: dataframe with questions in the columns and answers in the rows\n", "#for loop will run n survey responses times (this could get big!)\n", @@ -144,12 +139,22 @@ " df = df_trips_w_surveys.reset_index()\n", " rows = []\n", " for i in range(len(df)):\n", - " row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse']['data'])\n", + " data_key = list(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]\n", + " row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n", " rows.append(row)\n", - "\n", " df = pd.concat(rows)\n", + "# print(df.head())\n", + " \n", + " rename_nests = {}\n", + " for col in df.columns:\n", + " rename_nests[col] = col.split('.')[-1]\n", + " \n", + " print(rename_nests)\n", + " df = df.rename(columns=rename_nests)\n", + " \n", " #drop the non-question columns, should leave behind all the questions\n", - " df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'meta.instanceID', 'meta.deprecatedID'])\n", + " #need to do this better, won't always be the same\n", + " df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'instanceID'])\n", "\n", " return df\n", "\n", @@ -182,7 +187,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4b9db890", + "id": "3b355efd", "metadata": {}, "outputs": [], "source": [ @@ -197,7 +202,7 @@ { "cell_type": "code", "execution_count": null, - "id": "caeb880b", + "id": "854b3070", "metadata": {}, "outputs": [], "source": [ @@ -206,7 +211,6 @@ "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n", "#remove blank inputs\n", "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n", - "print(len(survey_trips))\n", "\n", "#survey counts df\n", "survey_trips = survey_trips.reset_index()\n", @@ -223,46 +227,51 @@ { "cell_type": "code", "execution_count": null, - "id": "c00da0a7", + "id": "7fe65f88", "metadata": {}, "outputs": [], "source": [ + "\n", + "\n", "#create translation dictionaries\n", "opt_dict, quest_dict = build_dictionaries(sheet_list)\n", + "print(opt_dict)\n", + "print(quest_dict)\n", "\n", "#format survey trips into responses dataframe\n", - "df_responses = create_dataframe(survey_trips)\n", - "\n", - "#replace questions\n", - "df_responses = df_responses.rename(columns = quest_dict)" + "df_responses = create_dataframe(survey_trips)" ] }, { "cell_type": "code", "execution_count": null, - "id": "08d04b39", + "id": "7bcebeef", "metadata": { "scrolled": false }, "outputs": [], "source": [ + "file_suffix = scaffolding.get_file_suffix(year, month, program)\n", + "\n", "#create one plot per question\n", "for col in df_responses.columns:\n", + " filename = col + file_suffix\n", + " print(filename)\n", + " \n", " quest_frame = df_responses.copy()\n", " quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n", - " labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n", + " labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n", + "# labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n", " values = quest_frame[col].value_counts(dropna=True).tolist()\n", + " \n", + " try:\n", + " label = quest_dict[col]\n", + " except:\n", + " label = col\n", " \n", - " pie_chart_purpose(col+'\\n'+qual_text, labels, values, \"howdy\")" + " #if other is 0 don't display it :)\n", + " pie_chart_purpose(label+'\\n'+qual_text, labels, values, filename)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "132e00a1", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 427f44b5d997b244cb3f73dcd1a6e445a1337b0d Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Wed, 20 Mar 2024 16:33:06 -0600 Subject: [PATCH 04/70] update dictionary building there was a bug (duplicate code) in how I was creating the dictionaries --- viz_scripts/survey_responses.ipynb | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb index 5c8100e..759d046 100644 --- a/viz_scripts/survey_responses.ipynb +++ b/viz_scripts/survey_responses.ipynb @@ -113,24 +113,6 @@ " \n", " return opt_dict, quest_dict\n", "\n", - "def build_dictionaries(url_list):\n", - " opt_dict = {}\n", - " quest_dict = {}\n", - " \n", - " for url in sheet_list:\n", - " result = urllib.request.urlopen(url)\n", - " tree = ET.parse(result)\n", - " root = tree.getroot()\n", - " \n", - " print(root.findall(\".\"))\n", - " \n", - " for child in root:\n", - " print(child.tag, child.attrib)\n", - " \n", - " print(root.findall(\".//label\"))\n", - " \n", - " return opt_dict, quest_dict\n", - "\n", "\n", "#input: dataframe containing all trips that have non-blank user_input\n", "#output: dataframe with questions in the columns and answers in the rows\n", @@ -231,10 +213,9 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", "#create translation dictionaries\n", "opt_dict, quest_dict = build_dictionaries(sheet_list)\n", + "\n", "print(opt_dict)\n", "print(quest_dict)\n", "\n", From 94f48c5af9945c17b0bb5a5952ce5d330045dd13 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Wed, 20 Mar 2024 16:33:46 -0600 Subject: [PATCH 05/70] only display questions currently in the survey We only want to display question that are still in the survey, as those are the questions we will actually have a response for --- viz_scripts/survey_responses.ipynb | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb index 759d046..128f3b4 100644 --- a/viz_scripts/survey_responses.ipynb +++ b/viz_scripts/survey_responses.ipynb @@ -244,14 +244,11 @@ " labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n", "# labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n", " values = quest_frame[col].value_counts(dropna=True).tolist()\n", - " \n", - " try:\n", - " label = quest_dict[col]\n", - " except:\n", - " label = col\n", " \n", " #if other is 0 don't display it :)\n", - " pie_chart_purpose(label+'\\n'+qual_text, labels, values, filename)" + " #will only show questions in the current survey (not older versions) and that have at least 1 response\n", + " if col in quest_dict and len(quest_frame[col]) != 0:\n", + " pie_chart_purpose(quest_dict[col]+'\\n'+qual_text, labels, values, filename)" ] } ], From 7f3d4779446f91bb9428408018fdb37de28db942 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Fri, 22 Mar 2024 12:16:49 -0600 Subject: [PATCH 06/70] connect charts to frontend added code to the index.html file to fetch surveys, add each of the options for the survey questions, and display the charts by default on the dashboard --- frontend/index.html | 60 ++++++++++++++++++++++++++++- frontend/metrics_study_surveys.html | 4 ++ viz_scripts/survey_responses.ipynb | 7 ++-- 3 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 frontend/metrics_study_surveys.html diff --git a/frontend/index.html b/frontend/index.html index a7e3ac1..94e116f 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -344,6 +344,12 @@ const end_year = date.getFullYear(); var current_month = start_month; var current_year = start_year; + + //testing dfc fermata .. doesn't start until April... + // if ((current_month >= end_month) && (current_year >= end_year)) { + // current_month = current_month - 2; //dfc has not started yet... + // } + dates.push([current_month, current_year]); while (!(current_month == end_month && current_year == end_year)) { current_month += 1; @@ -356,6 +362,25 @@ }; return dates; }; + + function getDictionaryList(form_list) { + var quest_dict = {}; + return new Promise(async (resolve) => { + for (i in form_list) { + response = await fetch(form_list[i]); + text = await response.text(); + const parser = new DOMParser(); + const doc = parser.parseFromString(text, "text/xml"); + labels = doc.getElementsByTagName("label"); + for (i in labels) { + try { + quest_dict[labels[i].parentNode.getAttribute("ref").split('/').slice(-1)] = labels[i].firstChild.data; + } catch (e) { } + } + } + resolve(quest_dict); + }) + };