GSA-TTS · luisgmetzger · Mar 5, 2025 · jadudm · Mar 5, 2025 · jadudm
diff --git a/cmd/collect/work.go b/cmd/collect/work.go
@@ -67,15 +67,23 @@ func ensureSchemasInitialized() error {
 func deserializeJSON(jsonString string) (map[string]interface{}, error) {
 	var jsonData map[string]interface{}
 
+	// Unmarshal the JSON data
 	if err := json.Unmarshal([]byte(jsonString), &jsonData); err != nil {
 		zap.L().Error("failed to unmarshal JSON", zap.Error(err))
 
 		return nil, fmt.Errorf("deserializeJSON: failed to unmarshal input JSON: %w", err)
 	}
 
-	// Pull in IsFull and hallpass
-	isFull, _ := jsonData["IsFull"].(bool)
-	hallPass, _ := jsonData["hallpass"].(bool)
+	// Safely check and retrieve optional fields
+	isFull := false
+	if v, ok := jsonData["IsFull"].(bool); ok {
+		isFull = v
+	}
+
+	hallPass := false
+	if v, ok := jsonData["hallpass"].(bool); ok {
+		hallPass = v
+	}
 
 	zap.L().Debug("deserialized JSON attributes",
 		zap.Bool("isFull", isFull),
@@ -86,6 +94,9 @@ func deserializeJSON(jsonString string) (map[string]interface{}, error) {
 }
 
 func selectSchema(jsonData map[string]interface{}) (*gojsonschema.Schema, error) {
+	// Debug log to inspect the incoming JSON
+	zap.L().Debug("selectSchema received JSON", zap.Any("jsonData", jsonData))
+
 	// Extract the "data" object
 	data, ok := jsonData["data"].(map[string]interface{})
 	if !ok {

diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go
@@ -4,6 +4,7 @@ package main
 import (
 	"context"
 	_ "embed"
+	"encoding/json"
 	"fmt"
 	"math"
 	"net/url"
@@ -21,6 +22,7 @@ import (
 	"github.com/GSA-TTS/jemison/internal/postgres/work_db"
 	"github.com/GSA-TTS/jemison/internal/queueing"
 	"github.com/GSA-TTS/jemison/internal/util"
+	"github.com/google/uuid"
 	"github.com/jackc/pgx/v5/pgtype"
 	"github.com/riverqueue/river"
 	"go.uber.org/zap"
@@ -313,5 +315,42 @@ func (w *FetchWorker) Work(_ context.Context, job *river.Job[common.FetchArgs])
 		Path:   job.Args.Path,
 	}
 
+	// Generate UUID
+	id := uuid.New().String()
+
+	// Create data to send to the `collect` service
+	collectData := map[string]interface{}{
+		"data": map[string]interface{}{
+			"id":      id,
+			"source":  "fetch",
+			"payload": "default-payload",
+			"url":     hostAndPath(job),  // Full URL being fetched
+			"count":   fetchCount.Load(), // Total count of fetched URLs
+		},
+	}
+
+	// Marshal the data to JSON format
+	collectJSON, err := json.Marshal(collectData)
+	if err != nil {
+		// Wrap the error with additional context
+		wrappedErr := fmt.Errorf("failed to marshal collect data to JSON: %w", err)
+		zap.L().Error(wrappedErr.Error(), zap.Error(err))
+
+		return wrappedErr
+	}
+
+	// Enqueue the data to the `collect` queue
+	ChQSHP <- queueing.QSHP{
+		Queue:   "collect",
+		Scheme:  job.Args.Scheme,
+		Host:    job.Args.Host,
+		Path:    job.Args.Path,
+		RawData: string(collectJSON), // Include data for S3 logging
+	}
+
+	zap.L().Info("Logged URL to collect service",
+		zap.String("url", hostAndPath(job)),
+		zap.Int64("total_count", fetchCount.Load()))
+
 	return nil
 }
diff --git a/internal/common/schemas/fetch_schema.json b/internal/common/schemas/fetch_schema.json
@@ -7,7 +7,9 @@
       "properties": {
         "id": { "type": "string" },
         "source": { "type": "string" },
-        "payload": { "type": "string" }
+        "payload": { "type": "string" },
+        "url": { "type": "string" },
+        "count": { "type": "integer" }
       },
       "required": ["id", "source", "payload"]
     }