diff --git a/api/common.py b/api/common.py
index fb8eaad..e11dd23 100644
--- a/api/common.py
+++ b/api/common.py
@@ -97,12 +97,18 @@ def get_owner_repo_provider(repo_url,provider_full_name=False):
     repo = repo_url.split("/")[-1]
     owner = repo_url.split("/")[-2]
     provider = repo_url.split("/")[-3]
-    if provider not in ["github.com","gitlab.com"]:
+    if provider not in ["github.com","gitlab.com","www.github.com","www.gitlab.com"]:
         abort(400, "Unrecognized repository provider.")
+    
+    if provider == "www.github.com":
+        provider = "github.com"
+    if provider == "www.gitlab.com":
+        provider = "gitlab.com"
+
     if not provider_full_name:
-        if provider == "github.com":
+        if (provider == "github.com"):
             provider = "gh"
-        elif provider == "gitlab.com":
+        elif (provider == "gitlab.com"):
             provider = "gl"
 
     return [owner,repo,provider]
diff --git a/api/github_client.py b/api/github_client.py
index 84dcbed..65f81ed 100644
--- a/api/github_client.py
+++ b/api/github_client.py
@@ -44,7 +44,7 @@ def gh_filter(input_str):
     """
     Returns repository name in owner/repository_name format
     """
-    github_url_pattern = r'^https?://github\.com/([^/]+)/([^/]+)'
+    github_url_pattern = r'^https?://(?:www\.)?github\.com/([^/]+)/([^/]+)'
     match = re.match(github_url_pattern, input_str)
     if match:
         owner = match.group(1)
@@ -148,6 +148,7 @@ def gh_get_project_name(github_client,target_repo):
     folder as required by neurolibre.
     """
     repo = github_client.get_repo(gh_filter(target_repo))
+    print(target_repo)
     # This is a requirement
     contents = repo.get_contents("binder/data_requirement.json")
     data = json.loads(contents.decoded_content)
diff --git a/api/neurolibre_celery_tasks.py b/api/neurolibre_celery_tasks.py
index 8b0dce9..335b53d 100644
--- a/api/neurolibre_celery_tasks.py
+++ b/api/neurolibre_celery_tasks.py
@@ -1,6 +1,6 @@
 from celery import Celery
 import time
-import os 
+import os
 import json
 import subprocess
 from celery import states
@@ -17,6 +17,7 @@
 import shutil
 import base64
 from celery.exceptions import Ignore
+from repo2data.repo2data import Repo2Data
 
 DOI_PREFIX = "10.55458"
 DOI_SUFFIX = "neurolibre"
@@ -46,7 +47,7 @@
 # Set timezone US/Eastern (Montreal)
 def get_time():
     """
-    To be printed on issue comment updates for 
+    To be printed on issue comment updates for
     background tasks.
     """
     tz = pytz.timezone('US/Eastern')
@@ -64,10 +65,68 @@ def sleep_task(self, seconds):
         self.update_state(state='PROGRESS', meta={'remaining': seconds - i - 1})
     return 'done sleeping for {} seconds'.format(seconds)
 
+@celery_app.task(bind=True)
+def preview_download_data(self, payload):
+    """
+    Downloading data to the preview server.
+    """
+    task_title = "DATA DOWNLOAD (REPO2DATA)"
+    GH_BOT=os.getenv('GH_BOT')
+    github_client = Github(GH_BOT)
+    task_id = self.request.id
+
+    [owner,repo,provider] = get_owner_repo_provider(payload['repo_url'])
+    #commit_hash = format_commit_hash(payload['repo_url'],commit_hash)
+    logging.info(f"{owner}{provider}{repo}")
+
+    repo = github_client.get_repo(gh_filter(payload['repo_url']))
+    
+    try:
+        contents = repo.get_contents("binder/data_requirement.json")
+        data_manifest = json.loads(contents.decoded_content)
+        json_path = os.path.join("/DATA","tmp_repo2data",owner,repo,"data_requirement.json")
+        with open(json_path,"w") as f: 
+            json.dump(data_manifest,f)
+        if not data_manifest:
+            raise
+        project_name = data_manifest['projectName'] 
+    except Exception as e:
+        message = f"Data download has failed: {str(e)}"
+        if payload['email']:
+            send_email(payload['email'], "NeuroLibre: Data download request", message)
+        else:
+            gh_template_respond(github_client,"failure",task_title,payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],
+                f"Data exists for {project_name}; not overwriting by default! Please set overwrite=True."
+            )
+
+    data_path = os.path.join("/DATA", project_name)
+    if os.path.exists(data_path) and not payload['overwrite']:
+        gh_template_respond(github_client,"failure",task_title,payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],
+            f"Data exists for {project_name}; not overwriting by default! Please set overwrite=True."
+        )
+        self.update_state(state=states.IGNORED, meta={'message': f"Data already downloaded downloaded to {data_path}."})
+        return
+
+    # download data with repo2data
+    repo2data = Repo2Data(json_path, server=True)
+    downloaded_data_path = repo2data.install()[0]
+    message = f"Downloaded data in {downloaded_data_path}."
+
+    # update status
+    if payload['email']:
+        send_email(payload['email'], "NeuroLibre: Data download request", message)
+        self.update_state(state=states.SUCCESS, meta={'message': message})
+    else:
+        gh_template_respond(github_client,"received",task_title,payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],
+            message
+        )
+        self.update_state(state=states.SUCCESS, meta={'message': message})
+
+
 @celery_app.task(bind=True)
 def rsync_data_task(self, comment_id, issue_id, project_name, reviewRepository):
     """
-    Uploading data to the production server 
+    Uploading data to the production server
     from the test server.
     """
     task_title = "DATA TRANSFER (Preview --> Preprint)"
@@ -110,10 +169,10 @@ def rsync_book_task(self, repo_url, commit_hash, comment_id, issue_id, reviewRep
     """
     Moving the book from the test to the production
     server. This book is expected to be built from
-    a roboneurolibre repository. 
+    a roboneurolibre repository.
 
     Once the book is available on the production server,
-    content is symlinked to a DOI formatted directory (Nginx configured) 
+    content is symlinked to a DOI formatted directory (Nginx configured)
     to enable DOI formatted links.
     """
     task_title = "REPRODUCIBLE PREPRINT TRANSFER (Preview --> Preprint)"
@@ -121,7 +180,7 @@ def rsync_book_task(self, repo_url, commit_hash, comment_id, issue_id, reviewRep
     github_client = Github(GH_BOT)
     task_id = self.request.id
     [owner,repo,provider] = get_owner_repo_provider(repo_url,provider_full_name=True)
-    if owner != "roboneurolibre": 
+    if owner != "roboneurolibre":
         gh_template_respond(github_client,"failure",task_title,reviewRepository,issue_id,task_id,comment_id, f"Repository is not under roboneurolibre organization!")
         self.update_state(state=states.FAILURE, meta={'exc_type':"NeuroLibre celery exception",'exc_message': "Custom",'message': f"FAILURE: Repository {owner}/{repo} has no roboneurolibre fork."})
         return
@@ -137,7 +196,7 @@ def rsync_book_task(self, repo_url, commit_hash, comment_id, issue_id, reviewRep
         self.update_state(state=states.STARTED, meta={'message': f"Transfer started {now}"})
         gh_template_respond(github_client,"started",task_title,reviewRepository,issue_id,task_id,comment_id, "")
         #logging.info("Calling subprocess")
-        process = subprocess.Popen(["/usr/bin/rsync", "-avR", remote_path, "/"], stdout=subprocess.PIPE,stderr=subprocess.STDOUT) 
+        process = subprocess.Popen(["/usr/bin/rsync", "-avR", remote_path, "/"], stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
         output = process.communicate()[0]
         ret = process.wait()
         logging.info(output)
@@ -184,15 +243,15 @@ def rsync_book_task(self, repo_url, commit_hash, comment_id, issue_id, reviewRep
 @celery_app.task(bind=True)
 def fork_configure_repository_task(self, payload):
     task_title = "INITIATE PRODUCTION (Fork and Configure)"
-    
+
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
-    
+
     now = get_time()
     self.update_state(state=states.STARTED, meta={'message': f"Transfer started {now}"})
     gh_template_respond(github_client,"started",task_title,payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], "")
-    
+
     book_tested_check = get_test_book_build(PREVIEW_SERVER,True,payload['commit_hash'])
     # Production cannot be started if there's a book at the latest commit hash at which
     # the production is asked for.
@@ -214,7 +273,7 @@ def fork_configure_repository_task(self, payload):
     forked_name = gh_forkify_name(payload['repository_url'])
     # First check if a fork already exists.
     fork_exists  = False
-    try: 
+    try:
         github_client.get_repo(forked_name)
         fork_exists = True
     except UnknownObjectException as e:
@@ -248,7 +307,7 @@ def fork_configure_repository_task(self, payload):
             return
     else:
         logging.info(f"Fork already exists {payload['repository_url']}, moving on with configurations.")
-    
+
     gh_template_respond(github_client,"started",task_title,payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], "Forked repo has become available. Proceeding with configuration updates.")
 
     jb_config = gh_get_jb_config(github_client,forked_name)
@@ -291,7 +350,7 @@ def fork_configure_repository_task(self, payload):
                 "title": "Citable PDF and archives"
             }]
         })
-    
+
     if 'chapters' in jb_toc:
         jb_toc_new['chapters'].append({
             "url": f"{PAPERS_PATH}/{DOI_PREFIX}/{DOI_SUFFIX}.{payload['issue_id']:05d}",
@@ -303,7 +362,7 @@ def fork_configure_repository_task(self, payload):
             "url": f"{PAPERS_PATH}/{DOI_PREFIX}/{DOI_SUFFIX}.{payload['issue_id']:05d}",
             "title": "Citable PDF and archives"
         })
-    
+
     # Update TOC file in the forked repo only if the new toc is different
     # otherwise github api will complain.
     if not jb_toc_new != jb_toc:
@@ -382,7 +441,7 @@ def generate():
         # Fetch all the yielded messages
     binder_logs = binder_response.get_data(as_text=True)
     binder_logs = "".join(binder_logs)
-    # After the upstream closes, check the server if there's 
+    # After the upstream closes, check the server if there's
     # a book built successfully.
     book_status = book_get_by_params(commit_hash=payload['commit_hash'])
     # For now, remove the block either way.
@@ -393,7 +452,7 @@ def generate():
         os.remove(lock_filename)
         # Append book-related response downstream
     if not book_status:
-        # These flags will determine how the response will be 
+        # These flags will determine how the response will be
         # interpreted and returned outside the generator
         gh_template_respond(github_client,"failure","Binder build has failed &#129344;",payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], "The next comment will forward the logs")
         issue_comment = []
@@ -424,7 +483,7 @@ def generate():
 
 @celery_app.task(bind=True)
 def zenodo_create_buckets_task(self, payload):
-    
+
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
@@ -456,25 +515,25 @@ def zenodo_create_buckets_task(self, payload):
 
     for ii in range(len(data['authors'])):
         data['authors'][ii]['affiliation'] = first_affiliations[ii]
-    
-    # To deal with some typos, also with orchid :) 
+
+    # To deal with some typos, also with orchid :)
     valid_field_names = {'name', 'orcid', 'affiliation'}
     for author in data['authors']:
         invalid_fields = []
         for field in author:
             if field not in valid_field_names:
                 invalid_fields.append(field)
-        
+
         for invalid_field in invalid_fields:
             valid_field = None
             for valid_name in valid_field_names:
                 if valid_name.lower() in invalid_field.lower() or (valid_name == 'orcid' and invalid_field.lower() == 'orchid'):
                     valid_field = valid_name
                     break
-            
+
             if valid_field:
                 author[valid_field] = author.pop(invalid_field)
-        
+
         if 'equal-contrib' in author:
             author.pop('equal-contrib')
 
@@ -495,7 +554,7 @@ def zenodo_create_buckets_task(self, payload):
                 collect[archive_type] = r
                 # Rate limit
                 time.sleep(2)
-    
+
     if {k: v for k, v in collect.items() if 'reason' in v}:
         # This means at least one of the deposits has failed.
         logging.info(f"Caught an issue with the deposit. A record (JSON) will not be created.")
@@ -518,7 +577,7 @@ def zenodo_create_buckets_task(self, payload):
 
 @celery_app.task(bind=True)
 def zenodo_flush_task(self,payload):
-    
+
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
@@ -558,7 +617,7 @@ def zenodo_flush_task(self,payload):
             prog[item] = False
             msg.append(f"\n The {item} deposit does not exist.")
             gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],"".join(msg))
-    
+
     # Update the issue comment
     gh_template_respond(github_client,"started",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],"".join(msg))
 
@@ -579,11 +638,11 @@ def zenodo_upload_book_task(self, payload):
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
-    
+
     gh_template_respond(github_client,"started",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'])
 
     owner,repo,provider = get_owner_repo_provider(payload['repository_url'],provider_full_name=True)
-    
+
     fork_url = f"https://{provider}/roboneurolibre/{repo}"
     commit_fork = format_commit_hash(fork_url,"HEAD")
     record_name = item_to_record_name("book")
@@ -619,7 +678,7 @@ def zenodo_upload_book_task(self, payload):
 
 @celery_app.task(bind=True)
 def zenodo_upload_data_task(self,payload):
-        
+
         GH_BOT=os.getenv('GH_BOT')
         github_client = Github(GH_BOT)
         task_id = self.request.id
@@ -641,7 +700,7 @@ def zenodo_upload_data_task(self,payload):
             logging.info(f"Compressed data already exists {record_name}_10.55458_NeuroLibre_{payload['issue_id']:05d}_{commit_fork[0:6]}.zip")
             tar_file = expect
         else:
-            # We will archive the data synced from the test server. (item_arg is the project_name, indicating that the 
+            # We will archive the data synced from the test server. (item_arg is the project_name, indicating that the
             # data is stored at the /DATA/project_name folder)
             local_path = os.path.join("/DATA", project_name)
             # Descriptive file name
@@ -675,14 +734,14 @@ def zenodo_upload_repository_task(self, payload):
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
-    
+
     gh_template_respond(github_client,"started",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'])
 
     owner,repo,provider = get_owner_repo_provider(payload['repository_url'],provider_full_name=True)
-    
+
     fork_url = f"https://{provider}/roboneurolibre/{repo}"
     commit_fork = format_commit_hash(fork_url,"HEAD")
-    
+
     default_branch = get_default_branch(github_client,fork_url)
 
     download_url = f"{fork_url}/archive/refs/heads/{default_branch}.zip"
@@ -722,11 +781,11 @@ def zenodo_upload_docker_task(self, payload):
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
-    
+
     gh_template_respond(github_client,"started",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'])
 
     owner,repo,provider = get_owner_repo_provider(payload['repository_url'],provider_full_name=True)
-    
+
     fork_url = f"https://{provider}/roboneurolibre/{repo}"
     commit_fork = format_commit_hash(fork_url,"HEAD")
 
@@ -773,7 +832,7 @@ def zenodo_upload_docker_task(self, payload):
 
         # Login to the private registry to pull images
         r = docker_login()
-        
+
         if not r['status']:
             msg = f"Cannot login to NeuroLibre private docker registry. \n {r['message']}"
             gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], msg)
@@ -790,7 +849,7 @@ def zenodo_upload_docker_task(self, payload):
             gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], msg)
             self.update_state(state=states.FAILURE, meta={'exc_type':"NeuroLibre celery exception",'exc_message': "Custom",'message': msg})
             return
-        
+
         msg = f"Exporting docker image: \n {lut['docker_image']}"
         gh_template_respond(github_client,"started",payload['task_title'] + " `exporting (2/3)`", payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],msg)
 
@@ -831,16 +890,16 @@ def zenodo_upload_docker_task(self, payload):
 
 @celery_app.task(bind=True)
 def zenodo_publish_task(self, payload):
-    
+
     GH_BOT=os.getenv('GH_BOT')
     github_client = Github(GH_BOT)
     task_id = self.request.id
-    
+
     gh_template_respond(github_client,"started",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'])
     prompt = "First state that you will issue commands to set DOIs for the reproducibility assets, then you'll talk to yourself a bit. But reassure in a funny way that there's nothing to worry about because you are not an artificial general intelligence (yet). Keep it to a few sentences."
     # Check if already published
     publish_status_init = zenodo_confirm_status(payload['issue_id'],"published")
-    
+
     if publish_status_init[0]:
         # Means already published. In this case just set the DOIs.
         gh_template_respond(github_client,"started",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'],"As the reproducibility assets have already been published, I will just set the DOIs.")
@@ -866,7 +925,7 @@ def zenodo_publish_task(self, payload):
         return
     else:
         # Confirm that all items are published.
-        # TODO: Check this 
+        # TODO: Check this
         publish_status = zenodo_confirm_status(payload['issue_id'],"published")
         # If all items are published, success. Add DOIs.
         if publish_status[0]:
@@ -878,7 +937,7 @@ def zenodo_publish_task(self, payload):
                 gh_create_comment(github_client,payload['review_repository'],payload['issue_id'],command)
                 time.sleep(1)
         else:
-            # Some one None 
+            # Some one None
             response.append(f"\n Looks like there's a problem. {publish_status[1]} reproducibility assets are archived.")
             msg = "\n".join(response)
             gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], msg, False)
@@ -945,7 +1004,7 @@ def generate():
         # Fetch all the yielded messages
     binder_logs = binder_response.get_data(as_text=True)
     binder_logs = "".join(binder_logs)
-    # After the upstream closes, check the server if there's 
+    # After the upstream closes, check the server if there's
     # a book built successfully.
     book_status = book_get_by_params(commit_hash=payload['commit_hash'])
     exec_error = book_execution_errored(owner,repo,provider,payload['commit_hash'])
@@ -957,7 +1016,7 @@ def generate():
         os.remove(lock_filename)
         # Append book-related response downstream
     if not book_status or exec_error:
-        # These flags will determine how the response will be 
+        # These flags will determine how the response will be
         # interpreted and returned outside the generator
         #gh_template_respond(github_client,"failure","Binder build has failed &#129344;",payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], "The next comment will forward the logs")
         issue_comment = []
@@ -1050,7 +1109,7 @@ def write_html_to_temp_directory(commit_sha, logs):
             f.write("<body>\n")
             f.write(f"{logs}")
             f.write("</body></html>\n")
-    
+
     return file_path
 
 @celery_app.task(bind=True)
@@ -1066,7 +1125,7 @@ def preprint_build_pdf_draft(self, payload):
         shutil.rmtree(target_path)
     try:
         gh_clone_repository(payload['repository_url'], target_path, depth=1)
-    except Exception as e: 
+    except Exception as e:
         gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], str(e))
         self.update_state(state=states.FAILURE, meta={'exc_type':"NeuroLibre celery exception",'exc_message': "Custom",'message': str(e)})
         return
@@ -1074,7 +1133,7 @@ def preprint_build_pdf_draft(self, payload):
     res = create_extended_pdf_sources(target_path, payload['issue_id'],payload['repository_url'])
     if res['status']:
         try:
-            process = subprocess.Popen(["docker", "run","--rm", "-v", f"{target_path}:/data", "-u", "ubuntu:www-data", "neurolibre/inara:latest","-o", "neurolibre", "./paper.md"], stdout=subprocess.PIPE,stderr=subprocess.STDOUT) 
+            process = subprocess.Popen(["docker", "run","--rm", "-v", f"{target_path}:/data", "-u", "ubuntu:www-data", "neurolibre/inara:latest","-o", "neurolibre", "./paper.md"], stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
             output = process.communicate()[0]
             ret = process.wait()
             logging.info(output)
@@ -1100,7 +1159,7 @@ def preprint_build_pdf_draft(self, payload):
         except subprocess.CalledProcessError as e:
             gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], f"{e.output}")
             self.update_state(state=states.FAILURE, meta={'exc_type':"NeuroLibre celery exception",'exc_message': "Custom",'message': e.output})
-    else: 
+    else:
         gh_template_respond(github_client,"failure",payload['task_title'], payload['review_repository'],payload['issue_id'],task_id,payload['comment_id'], f"{res['message']}")
         self.update_state(state=states.FAILURE, meta={'exc_type':"NeuroLibre celery exception",'exc_message': "Custom",'message': res['message']})
 
diff --git a/api/neurolibre_common_api.py b/api/neurolibre_common_api.py
index 8b5d0f3..e227054 100644
--- a/api/neurolibre_common_api.py
+++ b/api/neurolibre_common_api.py
@@ -104,4 +104,13 @@ def api_unlock_build(user, repo_url):
         response =  make_response(f"No build lock found for {repo_url}",404)
     
     response.mimetype = "text/plain"
-    return response
\ No newline at end of file
+    return response
+
+@common_api.route('/public/data', methods=['GET'])
+@doc(description='List the name of folders under /DATA.', tags=['Data'])
+def api_preview_list():
+    """
+    This endpoint is to list the contents of the /DATA folder.
+    """
+    files = os.listdir('/DATA')
+    return make_response(jsonify(files),200)
\ No newline at end of file
diff --git a/api/neurolibre_preview_api.py b/api/neurolibre_preview_api.py
index 66ee231..5c90789 100644
--- a/api/neurolibre_preview_api.py
+++ b/api/neurolibre_preview_api.py
@@ -8,7 +8,7 @@
 import neurolibre_common_api
 from flask import jsonify, make_response
 from common import *
-from schema import BuildSchema, BuildTestSchema
+from schema import BuildSchema, BuildTestSchema, DownloadSchema
 from flask_htpasswd import HtPasswdAuth
 from dotenv import load_dotenv
 from werkzeug.middleware.proxy_fix import ProxyFix
@@ -16,7 +16,7 @@
 from apispec import APISpec
 from apispec.ext.marshmallow import MarshmallowPlugin
 from github_client import *
-from neurolibre_celery_tasks import celery_app, sleep_task, preview_build_book_task, preview_build_book_test_task
+from neurolibre_celery_tasks import celery_app, sleep_task, preview_build_book_task, preview_build_book_test_task, preview_download_data
 from celery.events.state import State
 from github import Github, UnknownObjectException
 
@@ -52,13 +52,13 @@
 
 app.logger.info(f"Using {binderName}.{domainName} as BinderHub.")
 
-serverContact = app.config["SERVER_CONTACT"] 
+serverContact = app.config["SERVER_CONTACT"]
 serverName = app.config["SERVER_SLUG"]
 serverDescription = app.config["SERVER_DESC"]
 serverTOS = app.config["SERVER_TOS"]
 serverAbout = app.config["SERVER_ABOUT"] + app.config["SERVER_LOGO"]
 
-# API specifications displayed on the swagger UI 
+# API specifications displayed on the swagger UI
 spec = APISpec(
         title="Neurolibre preview & screening API",
         version='v1',
@@ -84,6 +84,7 @@
 docs.register(neurolibre_common_api.api_get_books,blueprint="common_api")
 docs.register(neurolibre_common_api.api_heartbeat,blueprint="common_api")
 docs.register(neurolibre_common_api.api_unlock_build,blueprint="common_api")
+docs.register(neurolibre_common_api.api_preview_list,blueprint="common_api")
 
 """
 Configuration END
@@ -97,6 +98,45 @@
 API Endpoints START
 """
 
+@app.route('/api/data/cache', methods=['POST'])
+@htpasswd.required
+@marshal_with(None,code=422,description="Cannot validate the payload, missing or invalid entries.")
+@use_kwargs(DownloadSchema())
+@doc(description='Endpoint for downloading data through repo2data.', tags=['Data'])
+def api_download_data(user, id, repo_url, email, is_overwrite):
+    """
+    This endpoint is to download data from GitHub (technical screening) requests.
+    """
+    GH_BOT=os.getenv('GH_BOT')
+    github_client = Github(GH_BOT)
+    issue_id = id
+
+    task_title = "Download data for preview."
+    comment_id = gh_template_respond(github_client,"pending",task_title,reviewRepository,issue_id)
+
+    celery_payload = dict(repo_url=repo_url,
+                          rate_limit=build_rate_limit,
+                          binder_name=binderName,
+                          domain_name = domainName,
+                          comment_id=comment_id,
+                          issue_id=issue_id,
+                          review_repository=reviewRepository,
+                          task_title=task_title,
+                          overwrite=is_overwrite,
+                          email=email)
+
+    task_result = preview_download_data.apply_async(args=[celery_payload])
+
+    if task_result.task_id is not None:
+        gh_template_respond(github_client,"received",task_title,reviewRepository,issue_id,task_result.task_id,comment_id, "")
+        response = make_response(jsonify("Celery task assigned successfully."),200)
+    else:
+        # If not successfully assigned, fail the status immediately and return 500
+        gh_template_respond(github_client,"failure",task_title,reviewRepository,issue_id,task_result.task_id,comment_id, "Internal server error: NeuroLibre background task manager could not receive the request.")
+        response = make_response(jsonify("Celery could not start the task."),500)
+    return response
+
+docs.register(api_download_data)
 
 @app.route('/api/book/build', methods=['POST'])
 @htpasswd.required
@@ -115,16 +155,16 @@ def api_book_build(user, id, repo_url, commit_hash):
     task_title = "Book Build (Preview)"
     comment_id = gh_template_respond(github_client,"pending",task_title,reviewRepository,issue_id)
 
-    celery_payload = dict(repo_url=repo_url, 
-                          commit_hash=commit_hash, 
+    celery_payload = dict(repo_url=repo_url,
+                          commit_hash=commit_hash,
                           rate_limit=build_rate_limit,
-                          binder_name=binderName, 
+                          binder_name=binderName,
                           domain_name = domainName,
                           comment_id=comment_id,
                           issue_id=issue_id,
                           review_repository=reviewRepository,
                           task_title=task_title)
-    
+
     task_result = preview_build_book_task.apply_async(args=[celery_payload])
 
     if task_result.task_id is not None:
@@ -153,18 +193,18 @@ def api_book_build_test(user, repo_url, commit_hash, email):
     [owner, repo, provider] = get_owner_repo_provider(repo_url)
     mail_subject = f"NRP test build for {owner}/{repo}"
     mail_body = f"We have received your request to build a NeuroLibre reproducible preprint from {repo_url} at {commit_hash}. \n Your request has been queued, we will inform you when the process starts."
-    
+
     send_email(email, mail_subject, mail_body)
 
-    celery_payload = dict(repo_url=repo_url, 
-                          commit_hash=commit_hash, 
+    celery_payload = dict(repo_url=repo_url,
+                          commit_hash=commit_hash,
                           rate_limit=build_rate_limit,
-                          binder_name=binderName, 
+                          binder_name=binderName,
                           domain_name = domainName,
                           email = email,
                           review_repository=reviewRepository,
                           mail_subject=mail_subject)
-    
+
     task_result = preview_build_book_test_task.apply_async(args=[celery_payload])
 
     if task_result.task_id is not None:
@@ -174,7 +214,7 @@ def api_book_build_test(user, repo_url, commit_hash, email):
         # If not successfully assigned, fail the status immediately and return 500
         mail_body = f"We could not start processing your NRP test request due to a technical issue on the server side. Please contact info@neurolibre.org."
         response = make_response(jsonify("Celery could not start the task."),500)
-    
+
     send_email(email, mail_subject, mail_body)
     return response
 
diff --git a/api/preprint.py b/api/preprint.py
index c508740..d03ae5d 100644
--- a/api/preprint.py
+++ b/api/preprint.py
@@ -6,7 +6,7 @@
 from dotenv import load_dotenv
 import re
 from github import Github
-from github_client import gh_read_from_issue_body 
+from github_client import gh_read_from_issue_body
 import csv
 import subprocess
 import nbformat
@@ -24,15 +24,15 @@
 load_dotenv()
 
 """
-Helper functions for the tasks 
+Helper functions for the tasks
 performed by the preprint (production server).
 """
 
 def zenodo_create_bucket(title, archive_type, creators, repository_url, issue_id):
-    
+
     [owner,repo,provider] =  get_owner_repo_provider(repository_url,provider_full_name=True)
 
-    # ASSUMPTION 
+    # ASSUMPTION
     # Fork exists and has the same name.
     fork_url = f"https://{provider}/roboneurolibre/{repo}"
 
@@ -40,10 +40,10 @@ def zenodo_create_bucket(title, archive_type, creators, repository_url, issue_id
     params = {'access_token': ZENODO_TOKEN}
     # headers = {"Content-Type": "application/json",
     #                 "Authorization": "Bearer {}".format(ZENODO_TOKEN)}
-    
-    # WANING: 
+
+    # WANING:
     # FOR NOW assuming that HEAD corresponds to the latest successful
-    # book build. That may not be the case. Requires better 
+    # book build. That may not be the case. Requires better
     # data handling or extra functionality to retrieve the latest successful
     # book commit.
     commit_user = format_commit_hash(repository_url,"HEAD")
@@ -77,11 +77,11 @@ def zenodo_create_bucket(title, archive_type, creators, repository_url, issue_id
         data["metadata"]["upload_type"] = "software"
         data["metadata"]["description"] = f"Docker image built from the {libre_text}, based on the {user_text}, using repo2docker (through BinderHub). <br> To run locally: <ol> <li><pre><code class=\"language-bash\">docker load < DockerImage_10.55458_NeuroLibre_{issue_id:05d}_{commit_fork[0:6]}.tar.gz</code><pre></li><li><pre><code class=\"language-bash\">docker run -it --rm -p 8888:8888 DOCKER_IMAGE_ID jupyter lab --ip 0.0.0.0</code></pre> </li></ol> <p><strong>by replacing <code>DOCKER_IMAGE_ID</code> above with the respective ID of the Docker image loaded from the zip file.</strong></p> {review_text} {sign_text}"
 
-    # Make an empty deposit to create the bucket 
+    # Make an empty deposit to create the bucket
     r = requests.post("https://zenodo.org/api/deposit/depositions",
                 params=params,
                 json=data)
-    
+
     print(f"Error: {r.status_code} - {r.text}")
     # response_dict = json.loads(r.text)
 
@@ -104,7 +104,7 @@ def execute_subprocess(command):
     To asynchronously execute system-levels using celery
     simple calls such as os.system will not work.
 
-    This helper function is to issue system-level command executions 
+    This helper function is to issue system-level command executions
     using celery.
     """
     # This will be called by Celery, subprocess must be handled properly
@@ -266,7 +266,7 @@ def item_to_record_name(item):
                 "book":"JupyterBook"}
     if item in dict_map.keys():
         return dict_map[item]
-    else: 
+    else:
         return None
 
 def zenodo_upload_item(upload_file,bucket_url,issue_id,commit_fork,item_name):
@@ -316,7 +316,7 @@ def parse_tsv_content(content):
     # Iterate over each row and add it to the parsed_data list
     for row in reader:
         parsed_data.append(row)
-    
+
     return parsed_data
 
 def get_test_book_build(preview_server,verify_ssl,commit_hash):
@@ -340,7 +340,7 @@ def get_test_book_build(preview_server,verify_ssl,commit_hash):
 
 def get_resource_lookup(preview_server,verify_ssl,repository_address):
     """
-    For a given repository address, returns a dictionary 
+    For a given repository address, returns a dictionary
     that contains the following fields:
         - "date","repository_url","docker_image","project_name","data_url","data_doi"
     IF there's a successful book build exists for the respective inquiry.
@@ -351,7 +351,7 @@ def get_resource_lookup(preview_server,verify_ssl,repository_address):
 
     Ideally, this should be dealt with using a proper database instead of a tsv file.
     """
-    
+
     url = f"{preview_server}/book-artifacts/lookup_table.tsv"
     headers = {'Content-Type': 'application/json'}
     API_USER = os.getenv('TEST_API_USER')
@@ -360,7 +360,7 @@ def get_resource_lookup(preview_server,verify_ssl,repository_address):
 
     # Send GET request
     response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
-    
+
     # Process response
     if response.ok:
         # Get content body
@@ -373,17 +373,17 @@ def get_resource_lookup(preview_server,verify_ssl,repository_address):
         if idx:
             # Convert to list
             values = parsed_data[idx][0].split(",")
-            # Convert to dict 
+            # Convert to dict
             # The last two keys are not reliable (that may contain comma that is not separating tsv column)
             # also due to subpar documentation issue with repo2data.
             keys = ["date","repository_url","docker_image","project_name","data_url","data_doi"]
             lut = dict(zip(keys, values))
-        else: 
+        else:
             lut = None
     else:
-        
+
         lut = None
-    
+
     return lut
 
 def zenodo_publish(issue_id):
@@ -405,7 +405,7 @@ def zenodo_publish(issue_id):
             message.append(f"\n :ice_cube: {item_to_record_name(item)} publish status:")
             r = requests.post(publish_link,params=params)
             response = r.json()
-            if r.status_code==202: 
+            if r.status_code==202:
                 message.append(f"\n :confetti_ball: <a href=\"{response['doi_url']}\"><img src=\"{response['links']['badge']}\"></a>")
                 tmp = f"zenodo_published_{item}_NeuroLibre_{issue_id:05d}.json"
                 log_file = os.path.join(get_deposit_dir(issue_id), tmp)
@@ -536,11 +536,11 @@ def parse_section_and_body(notebook):
     if current_paragraph:
         parsed_content.append({'section': current_section, 'paragraph': current_paragraph})
     return parsed_content
-    
+
 def myst_to_joss_tex_cite(input,match_format, subs_format):
     """
-    In a given string, find (MyST) citation directives that matches 
-    match_format, then replace them with JOSS-text template citation 
+    In a given string, find (MyST) citation directives that matches
+    match_format, then replace them with JOSS-text template citation
     directives based on the subs_format.
 
     This function is used by substitute_cite_commands to handle multiple formats.
@@ -554,40 +554,40 @@ def myst_to_joss_tex_cite(input,match_format, subs_format):
                 try:
                     citations = match.split(',')
                     formatted_citations = '; '.join([f'@{citation.strip()}' for citation in citations if citation])
-                    if subs_format == "p":              
-                        input = re.sub(match_format, f'[{formatted_citations}]', input, count=1)   
+                    if subs_format == "p":
+                        input = re.sub(match_format, f'[{formatted_citations}]', input, count=1)
                     if subs_format == "t":
-                        input = re.sub(rf'\{{cite:t\}}`{match}`', f'{formatted_citations}', input, count=1)   
+                        input = re.sub(rf'\{{cite:t\}}`{match}`', f'{formatted_citations}', input, count=1)
                 except:
-                    pass 
-                
+                    pass
+
         return input
 
 def substitute_cite_directives(input):
     """
-    Calls md_to_tex_cite for multiple citation formats, each case has to be 
+    Calls md_to_tex_cite for multiple citation formats, each case has to be
     handled individually as substitute patterns vary.
     """
     tmp = myst_to_joss_tex_cite(input, r'\{cite:p\}`([^`]*)`', "p")
     input = tmp if tmp else input
     tmp = myst_to_joss_tex_cite(input, r'\{cite:t\}`([^`]*)`', "t")
     input = tmp if tmp else input
-    return input 
+    return input
 
 def remove_html_tags(markdown):
     """
     Jupyter Book is intended to write documents without using html tags.
-    When wrapped between tags, MyST/JB parsers will skip the content. 
-    Use b4s to get rid of html tags. 
+    When wrapped between tags, MyST/JB parsers will skip the content.
+    Use b4s to get rid of html tags.
     """
     soup = BeautifulSoup(markdown, "html.parser")
     return soup.get_text()
 
-def myst_rm_admonition_render_html(input):    
+def myst_rm_admonition_render_html(input):
     md_parser = create_md_parser(MdParserConfig(),RendererHTML)
     parsed = md_parser.parse(input)
     #print(parsed)
-    # Apply desired filters here. 
+    # Apply desired filters here.
     filtered_tokens = [token for token in parsed if token.type != 'fence']
     #print(filtered_tokens)
     filtered_html = ""
@@ -623,7 +623,7 @@ def myst_md_to_joss_md(file_name):
         markdown_content = file.read()
 
     markdown_content = substitute_cite_directives(markdown_content)
-    markdown_content = to_md(myst_rm_admonition_render_html(markdown_content))    
+    markdown_content = to_md(myst_rm_admonition_render_html(markdown_content))
     return markdown_content
 
 def hyperlink_figure_references(match, issue_id):
@@ -643,7 +643,7 @@ def jbook_to_joss_md(input_files,issue_id):
         elif file_name.endswith('.md'):
             markdown_output = myst_md_to_joss_md(file_name)
             output = output + markdown_output
-    # Hyerlink to reproducible preprint at Figure refs
+    # Hyperlink to reproducible preprint at Figure refs
     pattern = r'(Figure|Fig\.)\s+(\d+[-\w]*)'
     output = re.sub(pattern, lambda match: hyperlink_figure_references(match, issue_id), output)
     return output
@@ -663,7 +663,7 @@ def append_bib_files(file1_path, file2_path, output_path):
 
 def merge_and_check_bib(target_path):
     """
-    For now simply appending one bib to another 
+    For now simply appending one bib to another
     later on, add duplication check.
     """
     orig_bib = os.path.join(target_path,"paper.bib")
@@ -671,7 +671,7 @@ def merge_and_check_bib(target_path):
     # Create a backup for the original markdown.
     shutil.copyfile(orig_bib, backup_bib)
     # Simply merge two bib files.
-    # TODO: GET THE DIRECTORY FROM FLASK 
+    # TODO: GET THE DIRECTORY FROM FLASK
     partial_bib = "/home/ubuntu/full-stack-server/assets/partial.bib"
     append_bib_files(orig_bib, partial_bib, orig_bib)
 
@@ -680,9 +680,9 @@ def create_extended_pdf_sources(target_path, issue_id, repository_url):
     target_path is where repository_url is cloned by the celery worker.
     """
     # This will crawl all the Jupyter Notebooks to collect text that cites
-    # articles, then will substitute MyST cite commands with Pandoc directives 
+    # articles, then will substitute MyST cite commands with Pandoc directives
     # recognized by OpenJournals PDF compilers.\
-    try: 
+    try:
         toc = get_local_yaml(os.path.join(target_path,"content","_toc.yml"))
         nl_local_file = os.path.join(target_path,"content","_neurolibre.yml")
         if os.path.isfile(nl_local_file):
@@ -741,9 +741,9 @@ def get_local_yaml(file):
 def nb_to_lab(file_path):
     with open(file_path, 'r') as f:
         content = f.read()
-    
+
     updated_content = re.sub(r'\?urlpath=tree/content/', '?urlpath=lab/tree/content/', content)
-    
+
     with open(file_path, 'w') as f:
         f.write(updated_content)
 
diff --git a/api/requirements.txt b/api/requirements.txt
index ff1aa50..4e37d05 100644
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -22,4 +22,5 @@ markdown
 markdownify==0.11.6
 bs4
 myst-parser==0.18.1
-markdown-it-py==2.0.1
\ No newline at end of file
+markdown-it-py==2.0.1
+repo2data==2.9.1
diff --git a/api/schema.py b/api/schema.py
index 50e6de5..14cd94f 100644
--- a/api/schema.py
+++ b/api/schema.py
@@ -21,6 +21,15 @@ class BookSchema(Schema):
 
 # Preview server
 
+class DownloadSchema(Schema):
+    """
+    Defines schema to be used for repo2data download. 
+    """
+    id = fields.Integer(required=False,description="Issue number of the technical screening of this preprint. If this used, the response will be returned to the respective github issue.")
+    repository_url = fields.Str(required=True,description="Full URL of a NeuroLibre compatible repository to be used for building the book.")
+    email = fields.Str(required=False,description="Email address, to which the result will be returned.")
+    overwrite = fields.Boolean(required=False,description="Whether or not the downloaded data will overwrite, if already exists.")
+
 class BuildSchema(Schema):
     """
     Defines payload types and requirements for book build request.
diff --git a/nginx/neurolibre-preview.conf b/nginx/neurolibre-preview.conf
index 3a6996f..003d6bb 100644
--- a/nginx/neurolibre-preview.conf
+++ b/nginx/neurolibre-preview.conf
@@ -20,11 +20,15 @@ server{
     client_max_body_size 4G;
     keepalive_timeout 5;
 
-    
-    auth_basic           "Administrator’s Area";
-    auth_basic_user_file /home/ubuntu/.htpasswd;
-
     location /api/ {
+      auth_basic           "Administrator’s Area";
+      auth_basic_user_file /home/ubuntu/.htpasswd;
+      include /etc/nginx/neurolibre_params;
+      proxy_pass http://app_server;
+    }
+
+    location /public/ {
+      auth_basic      off;
       include /etc/nginx/neurolibre_params;
       proxy_pass http://app_server;
     }