Skip to content

Commit

Permalink
Clean more null characters and unicode escapes from strings and JSON (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
radazen authored Apr 25, 2024
1 parent 3acb01d commit 44bd45f
Showing 1 changed file with 29 additions and 29 deletions.
58 changes: 29 additions & 29 deletions kafka-streamer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,8 @@ func runStreamer(ctx context.Context, pgx *pgxpool.Pool) {
// Creating multiple configs for each topic allows them to process separate partitions in parallel
configs := []*streamerConfig{
newEthereumOwnerConfig(deserializer, queries),
newEthereumOwnerConfig(deserializer, queries),
//newEthereumTokenConfig(deserializer, queries),
//newEthereumTokenConfig(deserializer, queries),
newEthereumTokenConfig(deserializer, queries),
newEthereumTokenConfig(deserializer, queries),
newBaseOwnerConfig(deserializer, queries),
newZoraOwnerConfig(deserializer, queries),
}
Expand Down Expand Up @@ -691,26 +690,26 @@ func parseTokenMessage(ctx context.Context, deserializer *avro.GenericDeserializ
SimplehashNftID: &nft.Nft_id,
ContractAddress: &contractAddress,
TokenID: tokenID,
Name: removeNullChars(nft.Name),
Description: removeNullChars(nft.Description),
Name: cleanString(nft.Name),
Description: cleanString(nft.Description),
Previews: previews,
ImageUrl: removeNullChars(nft.Image_url),
VideoUrl: removeNullChars(nft.Video_url),
AudioUrl: removeNullChars(nft.Audio_url),
ModelUrl: removeNullChars(nft.Model_url),
OtherUrl: removeNullChars(nft.Other_url),
BackgroundColor: removeNullChars(nft.Background_color),
ExternalUrl: removeNullChars(nft.External_url),
ImageUrl: cleanString(nft.Image_url),
VideoUrl: cleanString(nft.Video_url),
AudioUrl: cleanString(nft.Audio_url),
ModelUrl: cleanString(nft.Model_url),
OtherUrl: cleanString(nft.Other_url),
BackgroundColor: cleanString(nft.Background_color),
ExternalUrl: cleanString(nft.External_url),
OnChainCreatedDate: onChainCreatedDate,
Status: removeNullChars(nft.Status),
Status: cleanString(nft.Status),
TokenCount: tokenCount,
OwnerCount: ownerCount,
Contract: contract,
CollectionID: collectionID,
LastSale: lastSale,
FirstCreated: firstCreated,
Rarity: rarity,
ExtraMetadata: removeNullChars(nft.Extra_metadata),
ExtraMetadata: cleanString(nft.Extra_metadata),
ImageProperties: imageProperties,
VideoProperties: videoProperties,
AudioProperties: audioProperties,
Expand Down Expand Up @@ -769,35 +768,36 @@ func toJSONB[T any](data *T) (pgtype.JSONB, error) {
return pgtype.JSONB{}, err
}

var jsonb pgtype.JSONB
err = jsonb.Set(jsonData)
if err != nil {
return pgtype.JSONB{}, err
}
// Convert jsonData to a string
jsonStr := string(jsonData)

// Remove null bytes from the underlying string
jsonStr := string(jsonb.Bytes)
// Strip out any literal null bytes
jsonStr = strings.ReplaceAll(jsonStr, "\x00", "")

// Remove null characters
cleanedStr := strings.ReplaceAll(jsonStr, "\x00", "")
// Strip out any escaped null characters in JSON
cleanedStr := strings.ReplaceAll(jsonStr, "\\u0000", "")

// Convert the cleaned string back to bytes
jsonb.Bytes = []byte(cleanedStr)

err = jsonb.Set(jsonb.Bytes)
var jsonb pgtype.JSONB
// Convert the cleaned string back to bytes and set it to jsonb
err = jsonb.Set([]byte(cleanedStr))
if err != nil {
return pgtype.JSONB{}, err
}

jsonb.Status = pgtype.Present // Explicitly mark the JSONB data as present
jsonb.Status = pgtype.Present
return jsonb, nil
}

func removeNullChars(s *string) *string {
func cleanString(s *string) *string {
if s == nil {
return nil
}

// Remove null characters
cleanedStr := strings.ReplaceAll(*s, "\x00", "")

// Remove invalid UTF-8 sequences
cleanedStr = strings.ToValidUTF8(cleanedStr, "")

return &cleanedStr
}

0 comments on commit 44bd45f

Please sign in to comment.