OISF · jasonish · Mar 11, 2024 · Mar 5, 2024 · Mar 5, 2024
@@ -7,6 +7,13 @@
   instead of 4.0.0.
 - Handle URLs of bare files that don't end in .rules:
   https://redmine.openinfosecfoundation.org/issues/3664
+- Don't base dataset filenames on the contents of the file, but
+  instead the filename path:
+  https://redmine.openinfosecfoundation.org/issues/6763
+- Give each file in a source a unique filename by prefixing the files
+  with a hash of the URL to prevent duplicate filenames from
+  cloberring each other, in particular dataset files:
+  https://redmine.openinfosecfoundation.org/issues/6833
 
 ## 1.3.0 - 2023-07-07
 

@@ -465,9 +465,9 @@ def handle_dataset_files(rule, dep_files):
             return
         dataset_contents = dep_files[source_filename]
 
-    content_hash = hashlib.md5(dataset_contents).hexdigest()
-    new_rule = re.sub(r"(dataset.*?load\s+){}".format(dataset_filename), r"\g<1>datasets/{}".format(content_hash), rule.format())
-    dest_filename = os.path.join(config.get_output_dir(), "datasets", content_hash)
+    source_filename_hash = hashlib.md5(source_filename.encode()).hexdigest()
+    new_rule = re.sub(r"(dataset.*?load\s+){}".format(dataset_filename), r"\g<1>datasets/{}".format(source_filename_hash), rule.format())
+    dest_filename = os.path.join(config.get_output_dir(), "datasets", source_filename_hash)
     dest_dir = os.path.dirname(dest_filename)
     logger.debug("Copying dataset file {} to {}".format(dataset_filename, dest_filename))
     try:
@@ -985,9 +985,14 @@ def load_sources(suricata_version):
     # Now download each URL.
     files = []
     for url in urls:
+
+        # To de-duplicate filenames, add a prefix that is a hash of the URL.
+        prefix = hashlib.md5(url[0].encode()).hexdigest()
         source_files = Fetch().run(url)
         for key in source_files:
-            files.append(SourceFile(key, source_files[key]))
+            content = source_files[key]
+            key = format("{}/{}".format(prefix, key))
+            files.append(SourceFile(key, content))
 
     # Now load local rules.
     if config.get("local") is not None: