Documentation tweaks

biglocalnews · Aug 16, 2024 · da91b41 · da91b41
1 parent 30dbd04
commit da91b41
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/clean/ca/ventura_county_sheriff.py b/clean/ca/ventura_county_sheriff.py
@@ -25,21 +25,19 @@ class Site:
     agency_slug = "ca_ventura_county_sheriff"
 
     def __init__(self, data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DIR):
-        # Start page contains list of "detail"/child pages with links to the SB16/SB1421/AB748 videos and files
-        # along with additional index pages
+        # Start page contains list of "detail"/child pages with links to the SB16/SB1421/AB748
+        #  videos and files along with additional index pages
         self.base_url = "https://www.venturasheriff.org"
         self.index_urls = {
             f"{self.base_url}/sb1421/officer-involved-shooting-ois/": "ois.html",
             f"{self.base_url}/sb1421/use-of-force-great-bodily-injury-cases-gbi/": "gbi.html",
             f"{self.base_url}/ab748/": "ab748.html",
         }
 
-        # HEY! Need to build out dirs with appropriate slug structure
-        # Need to add detail dir
-
         self.cache = Cache(cache_dir)  # ~/.clean-scraper/cache/
         self.data_dir = data_dir
         self.cache_dir = cache_dir
+
         # Use module path to construct agency slug, which we'll use downstream
         # to create a subdir inside the main cache directory to stash files for this agency
         mod = Path(__file__)
@@ -51,12 +49,14 @@ def __init__(self, data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DI
     def scrape_meta(self, throttle: int = 0) -> Path:
         metadata: List[MetadataDict] = []
         page_urls = []
+        # Scrape index pages for both assets and links to case directories/subpages
         for index_url in self.index_urls:
             detail_page_links, local_metadata = self._process_index_page(index_url)
             page_urls.extend(detail_page_links)
             metadata.extend(local_metadata)
             time.sleep(throttle)
 
+        # Now, process the links of case directories/subpages
         for page_url in page_urls:
             local_metadata = self._process_detail_page(page_url)
             metadata.extend(local_metadata)
@@ -67,6 +67,7 @@ def scrape_meta(self, throttle: int = 0) -> Path:
         full_filename = self.cache.write_json(outfile, metadata)
         return full_filename
 
+    # Helper/Private Methods
     def _process_detail_page(self, target_url) -> List[MetadataDict]:
         """Extract links to files such as videos from a detail page and write to JSON file."""
         local_metadata: List[MetadataDict] = []
@@ -139,7 +140,6 @@ def _process_detail_page(self, target_url) -> List[MetadataDict]:
                     local_metadata.append(line)
         return local_metadata
 
-    # Helper/Private Methods
     def _process_index_page(self, target_url):
         local_metadata: List[MetadataDict] = []
         subpages = []