Tickets/dm 45892 (#13)

* WIP * WIP * WIP * WIP * WIP * WIP * WIPO * WIP * WIP * ignore as list * WIP * WIP * WIP * WIP * WIP * WIP * WIP
lsst-ts · Aug 30, 2024 · 91df4e1 · 91df4e1
1 parent b9a1fd9
commit 91df4e1
Showing 7 changed files with 729 additions and 26 deletions.
diff --git a/notebooks_tsqr/TEMPLATE_logrep.yaml b/notebooks_tsqr/TEMPLATE_logrep.yaml
@@ -1,6 +1,9 @@
 # For use with a Times Square notebook
 title: TEMPLATE for LR
-description: Prototype 1
+description: >
+  Copy and rename this ipynb and yaml sidecar into a new
+  pair of files (<log_source>.ipynb, <log_source>.yaml).
+  The TEMPLATE_* files will eventually be hidden in Times Square.
 authors:
   - name: Steve Pothier
     slack: Steve Pothier

diff --git a/notebooks_tsqr/efd.ipynb b/notebooks_tsqr/efd.ipynb
@@ -0,0 +1,356 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Parameters. Set defaults here.\n",
+    "# Times Square replaces this cell with the user's parameters.\n",
+    "record_limit = '999'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1",
+   "metadata": {},
+   "source": [
+    "<a class=\"anchor\" id=\"imports\"></a>\n",
+    "## Imports and General Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Only use packages available in the Rubin Science Platform\n",
+    "import requests\n",
+    "from collections import defaultdict\n",
+    "import pandas as pd\n",
+    "from pprint import pp, pformat\n",
+    "from urllib.parse import urlencode\n",
+    "from IPython.display import FileLink, display_markdown\n",
+    "from matplotlib import pyplot as plt\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env = 'usdf_dev'  # usdf-dev, tucson, slac, summit\n",
+    "log_name = 'narrativelog'\n",
+    "log = log_name\n",
+    "limit = int(record_limit)\n",
+    "response_timeout = 3.05  # seconds, how long to wait for connection\n",
+    "read_timeout = 20  # seconds\n",
+    "\n",
+    "timeout = (float(response_timeout), float(read_timeout))\n",
+    "\n",
+    "# RUNNING_INSIDE_JUPYTERLAB is True when running under Times Square\n",
+    "server = os.environ.get('EXTERNAL_INSTANCE_URL', \n",
+    "                         'https://tucson-teststand.lsst.codes')\n",
+    "service = f'{server}/{log}'\n",
+    "service"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4",
+   "metadata": {},
+   "source": [
+    "<a class=\"anchor\" id=\"setup_source\"></a>\n",
+    "## Setup Source"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "md = f'### Will retrieve from {service}'\n",
+    "display_markdown(md, raw=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "recs = None\n",
+    "ok = True\n",
+    "\n",
+    "# is_human=either&is_valid=either&offset=0&limit=50' \n",
+    "# site_ids=tucson&message_text=wubba&min_level=0&max_level=999&user_ids=spothier&user_agents=LOVE\n",
+    "# tags=love&exclude_tags=ignore_message\n",
+    "qparams = dict(is_human='either',\n",
+    "               is_valid='either',\n",
+    "               limit=limit,\n",
+    "              )\n",
+    "qstr = urlencode(qparams)\n",
+    "url = f'{service}/messages?{qstr}'\n",
+    "\n",
+    "ignore_fields = set(['tags', 'urls', 'message_text', 'id', 'date_added', \n",
+    "                     'obs_id', 'day_obs', 'seq_num', 'parent_id', 'user_id',\n",
+    "                     'date_invalidated', 'date_begin', 'date_end',\n",
+    "                     'time_lost', # float\n",
+    "                     #'systems','subsystems','cscs',  # values are lists, special handling\n",
+    "                    ])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display_markdown(f'## Get (up to {limit}) Records', raw=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO Often fails on first request.  Find out why!\n",
+    "try:\n",
+    "    response = requests.get(url, timeout=timeout)\n",
+    "except:\n",
+    "    pass \n",
+    "    \n",
+    "try:\n",
+    "    print(f'Attempt to get logs from {url=}')\n",
+    "    response = requests.get(url, timeout=timeout)\n",
+    "    response.raise_for_status()\n",
+    "    recs = response.json()\n",
+    "    flds = set(recs[0].keys())\n",
+    "    facflds = flds - ignore_fields\n",
+    "    # facets(field) = set(value-1, value-2, ...)\n",
+    "    facets = {fld: set([str(r[fld])\n",
+    "                for r in recs if not isinstance(r[fld], list)]) \n",
+    "                    for fld in facflds}\n",
+    "except Exception as err:\n",
+    "    ok = False\n",
+    "    print(f'ERROR getting {log} from {env=} using {url=}: {err=}')\n",
+    "numf = len(flds) if ok else 0\n",
+    "numr = len(recs) if ok else 0\n",
+    "print(f'Retrieved {numr} records, each with {numf} fields.')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9",
+   "metadata": {},
+   "source": [
+    "<a class=\"anchor\" id=\"table\"></a>\n",
+    "## Tables of (mostly raw) results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10",
+   "metadata": {},
+   "source": [
+    "### Fields names provided in records from log."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(flds, columns=['Field Name'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "12",
+   "metadata": {},
+   "source": [
+    "### Facets from log records.\n",
+    "A *facet* is the set all of values found for a field in the retrieved records. Facets are only calculated for some fields."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13",
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "display(pd.DataFrame.from_dict(facets, orient='index'))\n",
+    "display(facets)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14",
+   "metadata": {},
+   "source": [
+    "### Table of selected log record fields.\n",
+    "Table can be retrieved as CSV file for local use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15",
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "cols = ['date_added', 'time_lost']\n",
+    "df = pd.DataFrame(recs)[cols]\n",
+    "\n",
+    "# Allow download of CSV version of DataFrame\n",
+    "csvfile = 'tl.csv'\n",
+    "df.to_csv(csvfile)\n",
+    "myfile = FileLink(csvfile)\n",
+    "print('Table available as CSV file: ')\n",
+    "display(myfile)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(recs)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17",
+   "metadata": {},
+   "source": [
+    "<a class=\"anchor\" id=\"plot\"></a>\n",
+    "## Plots from log"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = [r['date_added'] for r in recs]\n",
+    "y = [r['time_lost'] for r in recs]\n",
+    "plt.plot(x, y) \n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "19",
+   "metadata": {},
+   "source": [
+    "<a class=\"anchor\" id=\"raw_analysis\"></a>\n",
+    "## Raw Content Analysis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "20",
+   "metadata": {},
+   "source": [
+    "### Example of one record"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec = recs[-1]\n",
+    "\n",
+    "msg = rec[\"message_text\"]\n",
+    "md = f'Message text from log:\\n> {msg}'\n",
+    "display_markdown(md, raw=True)\n",
+    "\n",
+    "display(rec)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22",
+   "metadata": {},
+   "source": [
+    "<a class=\"anchor\" id=\"elicitation\"></a>\n",
+    "## Stakeholder Elicitation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#EXTERNAL_INSTANCE_URL\n",
+    "ed = dict(os.environ.items())\n",
+    "with pd.option_context('display.max_rows', None,):\n",
+    "    print(pd.DataFrame(ed.values(), index=ed.keys()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks_tsqr/efd.yaml b/notebooks_tsqr/efd.yaml
@@ -0,0 +1,19 @@
+# For use with a Times Square notebook
+title: TEMPLATE for LR
+description: >
+  Copy and rename this ipynb and yaml sidecar into a new
+  pair of files (<log_source>.ipynb, <log_source>.yaml).
+  The TEMPLATE_* files will eventually be hidden in Times Square.
+authors:
+  - name: Steve Pothier
+    slack: Steve Pothier
+tags:
+  - reporting
+  - prototype
+parameters:
+  record_limit:
+    type: integer
+    description: Max number of records to output
+    default: 99
+    minimum: 1
+    maximum: 9999