diff --git a/nbviewer/app.py b/nbviewer/app.py index 851cf5f4..97ce6173 100644 --- a/nbviewer/app.py +++ b/nbviewer/app.py @@ -209,6 +209,10 @@ class NBViewer(Application): default_value="nbviewer.providers.gist.handlers.UserGistsHandler", help="The Tornado handler to use for viewing directory containing all of a user's Gists", ).tag(config=True) + gitlab_handler = Unicode( + default_value="nbviewer.providers.gitlab.handlers.GitlabHandler", + help="The Tornado handler to use for viewing notebooks in a GitLab instance" + ).tag(config=True) answer_yes = Bool( default_value=False, @@ -627,6 +631,7 @@ def init_tornado_application(self): local_handler=self.local_handler, url_handler=self.url_handler, user_gists_handler=self.user_gists_handler, + gitlab_handler=self.gitlab_handler, ) handler_kwargs = { "handler_names": handler_names, diff --git a/nbviewer/providers/__init__.py b/nbviewer/providers/__init__.py index 30170dfd..fe24c0d6 100644 --- a/nbviewer/providers/__init__.py +++ b/nbviewer/providers/__init__.py @@ -5,16 +5,16 @@ # the file COPYING, distributed as part of this software. # ----------------------------------------------------------------------------- + default_providers = [ - "nbviewer.providers.{}".format(prov) for prov in ["url", "github", "gist"] + "nbviewer.providers.{}".format(prov) for prov in ["url", "github", "gist", "gitlab"] ] default_rewrites = [ "nbviewer.providers.{}".format(prov) - for prov in ["gist", "github", "dropbox", "url"] + for prov in ["gitlab", "gist", "github", "dropbox", "url"] ] - def provider_handlers(providers, **handler_kwargs): """Load tornado URL handlers from an ordered list of dotted-notation modules which contain a `default_handlers` function diff --git a/nbviewer/providers/gitlab/__init__.py b/nbviewer/providers/gitlab/__init__.py new file mode 100644 index 00000000..9c6b9483 --- /dev/null +++ b/nbviewer/providers/gitlab/__init__.py @@ -0,0 +1 @@ +from .handlers import default_handlers, uri_rewrites diff --git a/nbviewer/providers/gitlab/client.py b/nbviewer/providers/gitlab/client.py new file mode 100644 index 00000000..cf2759be --- /dev/null +++ b/nbviewer/providers/gitlab/client.py @@ -0,0 +1,125 @@ +#----------------------------------------------------------------------------- +# Copyright (C) 2020 The IPython Development Team +# +# Distributed under the terms of the BSD License. The full license is in +# the file COPYING, distributed as part of this software. +#----------------------------------------------------------------------------- + +import json +import os +from urllib.parse import quote_plus +from tornado.httpclient import AsyncHTTPClient, HTTPClientError +from tornado.log import app_log +from ...utils import response_text + + +class GitlabClient(object): + """Asynchronous client for a private GitLab instance using V4 REST API. + + Please see https://docs.gitlab.com/ee/api/ for details.""" + + def __init__(self, host, token=None, client=None): + """Init a GitlabClient. + + host: str + token: optional str + This needs a private access token - if not provided, uses + environment variable GITLAB_TOKEN + client: AsyncHTTPClient + """ + self.client = client or AsyncHTTPClient() + self.host = host + self.token = token or os.environ.get("GITLAB_TOKEN") + + @property + def api_url(self): + """The base URL of the REST API.""" + return "https://{host}/api/v4".format(host=self.host) + + async def _fetch_json(self, url): + """Fetch JSON content at URL.""" + try: + response = await self.client.fetch(url) + text = response_text(response) + content = json.loads(text) + return content + except HTTPClientError as ex: + # log and raise because this can get lost in async + app_log.error(ex) + raise ex + + async def projects(self, search=None): + """List projects accessible on this GitLab instance.""" + projects_url = ("{base}/projects" + "?private_token={token}" + "&simple=true" + .format(base=self.api_url, token=self.token)) + + if search is not None: + projects_url = projects_url + "&search={}".format(search) + + return await self._fetch_json(projects_url) + + async def tree(self, project_id, branch="master", path=None, recursive=False): + """List all files in the given branch and project. + + project_id: int or str + branch: optional str + path: optional str (defaults to root) + recursive: optional bool + """ + if type(project_id) is str: + project_id = quote_plus(project_id) + + tree_url = ("{base}/projects/{project_id}/repository/tree" + "?private_token={token}" + "&recursive={recursive}" + "&ref={branch}" + "&per_page=1000" + .format(base=self.api_url, + project_id=project_id, + recursive=str(recursive), + branch=quote_plus(branch), + token=self.token)) + + if path is not None: + tree_url = "{url}&path={path}".format(url=tree_url, + path=quote_plus(path)) + + return await self._fetch_json(tree_url) + + async def fileinfo(self, project_id, filepath, branch="master"): + """Information for file in given branch and project. + + project_id: int or str + branch: str + filepath: str + """ + if type(project_id) is str: + project_id = quote_plus(project_id) + + file_url = ("{base}/projects/{project_id}/repository/files/{filepath}" + "?private_token={token}" + "&ref={branch}" + .format(base=self.api_url, + project_id=project_id, + branch=quote_plus(branch), + filepath=quote_plus(filepath), + token=self.token)) + return await self._fetch_json(file_url) + + def raw_file_url(self, project_id, blob_sha): + """URL of the raw file matching given blob SHA in project. + + project_id: int or str + blob_sha: str + """ + if type(project_id) is str: + project_id = quote_plus(project_id) + + raw_url = ("{base}/projects/{project_id}" + "/repository/blobs/{blob_sha}/raw?private_token={token}") + return raw_url.format(base=self.api_url, + project_id=project_id, + blob_sha=blob_sha, + token=self.token) diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py new file mode 100644 index 00000000..5200db30 --- /dev/null +++ b/nbviewer/providers/gitlab/handlers.py @@ -0,0 +1,174 @@ +#----------------------------------------------------------------------------- +# Copyright (C) 2020 The IPython Development Team +# +# Distributed under the terms of the BSD License. The full license is in +# the file COPYING, distributed as part of this software. +#----------------------------------------------------------------------------- + +import json +import os +from tornado import web +from tornado.httpclient import HTTPClientError +from tornado.log import app_log +from ..base import RenderingHandler, cached +from ...utils import response_text +from .. import _load_handler_from_location +from .client import GitlabClient + + +class GitlabHandler(RenderingHandler): + + async def lookup_notebook(self, client, group, repo, branch, filepath): + """Attempt to find the notebook by searching project trees. + Used when an instance is misconfigured and paths are getting sanitised.""" + projects = await client.projects(search=repo) + + project = None + path_with_namespace = "{0}/{1}".format(group, repo) + for p in projects: + print(p["path_with_namespace"]) + if p["path_with_namespace"] == path_with_namespace: + project = p + break + else: + raise Exception("Project path not found: " + path_with_namespace) + + tree = await client.tree(project["id"], branch, recursive=True) + + blob = None + for item in tree: + if item["path"] == filepath: + blob = item + break + else: + raise Exception("Blob not found: " + filepath) + + return client.raw_file_url(project["id"], blob["id"]) + + async def get_notebook_data(self, client, group, repo, branch, filepath): + path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) + + try: + fileinfo = await client.fileinfo(path_with_namespace, filepath, branch) + return client.raw_file_url(path_with_namespace, fileinfo["blob_id"]) + except HTTPClientError as http_error: + if http_error.code == 404: + try: + # Sometimes the url-encoded paths get sanitized, so give this a try + app_log.warn("Unable to access {filepath} in {path_with_namespace} directly, attempting lookup" + .format(filepath=filepath, + path_with_namespace=path_with_namespace)) + return await self.lookup_notebook(client, group, repo, branch, filepath) + except Exception as e: + app_log.error(e) + else: + app_log.error(http_error) + except Exception as e: + app_log.error(e) + + async def deliver_notebook(self, host, group, repo, branch, path, remote_url): + response = await self.fetch(remote_url) + + base_url = ("/gitlab/{host}/{group}/{repo}/tree/{branch}/" + .format(host=host, + group=group, + repo=repo, + branch=branch)) + + breadcrumbs = [{"url": base_url, "name": repo}] + dirpath = path.rsplit('/', 1)[0] + breadcrumbs.extend(self.breadcrumbs(dirpath, base_url)) + + try: + nbjson = response_text(response, encoding='utf-8') + except UnicodeDecodeError: + app_log.error("Notebook is not utf8: %s", remote_url, exc_info=True) + raise web.HTTPError(400) + + await self.finish_notebook(nbjson, + download_url=remote_url, + msg="file from url: " + remote_url, + public=False, + breadcrumbs=breadcrumbs, + request=self.request) + + def render_dirview_template(self, entries, title, breadcrumbs): + return self.render_template('dirview.html', + entries=entries, + breadcrumbs=breadcrumbs, + title=title) + + async def show_dir(self, client, group, repo, branch, dirpath): + path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) + tree = await client.tree(path_with_namespace, branch, dirpath) + + full_url = "/gitlab/{host}/{group}/{repo}/{path_type}/{branch}/{path}" + external_url = "https://{host}/{group}/{repo}/{path_type}/{branch}/{path}" + + base_url = ("/gitlab/{host}/{group}/{repo}/tree/{branch}/" + .format(host=client.host, + group=group, + repo=repo, + branch=branch)) + + breadcrumbs = [{"url": base_url, "name": repo}] + breadcrumbs.extend(self.breadcrumbs(dirpath, base_url)) + + entries = [] + for item in tree: + if item["type"] == "tree": + entry_class = "fa fa-folder-open" + url = item["path"] + elif item["type"] == "blob" and item["path"].endswith("ipynb"): + entry_class = "fa fa-book" + url = full_url.format(host=client.host, + group=group, + repo=repo, + path_type="blob", + branch=branch, + path=item["path"]) + else: + entry_class = "fa fa-share" + url = external_url.format(host=client.host, + group=group, + repo=repo, + path_type="blob", + branch=branch, + path=item["path"]) + + entries.append({"name": item["name"], + "url": url, + "class": entry_class}) + + html = self.render_dirview_template(entries=entries, + title=dirpath, + breadcrumbs=breadcrumbs) + await self.cache_and_finish(html) + + @cached + async def get(self, host, group, repo, path_type, branch, path): + client = GitlabClient(host) + if path_type == "blob": + raw_url = await self.get_notebook_data(client, group, repo, branch, path) + await self.deliver_notebook(host, group, repo, branch, path, raw_url) + else: + await self.show_dir(client, group, repo, branch, path) + +def uri_rewrites(rewrites=[]): + gitlab_rewrites = [ + (r'^https?://(gitlab\..*)$', r'/gitlab/{0}'), + (r'^/url[s]?/(gitlab\..*)$', r'/gitlab/{0}'), + (r'^/url[s]?/https?://(gitlab\..*)$', r'/gitlab/{0}'), + ] + return rewrites + gitlab_rewrites + +def default_handlers(handlers=[], **handler_names): + gitlab_handler = _load_handler_from_location(handler_names['gitlab_handler']) + return handlers + [ + (r'/gitlab/(?P[\w_\-.]+)' + '/(?P[\w_\-.]+)' + '/(?P[\w_\-]+)' + '/(?Pblob|tree)' + '/(?P[\w_\-()]+)' + '/(?P.*)', gitlab_handler, {}), + ]