diff --git a/deployment/docker/.env.example b/deployment/docker/.env.example index 785272bd..8ec64802 100644 --- a/deployment/docker/.env.example +++ b/deployment/docker/.env.example @@ -48,6 +48,8 @@ UDP_TRACKERS_ENABLED=true CONSUMER_REPLICAS=3 ## Fix for #66 - toggle on for development AUTO_CREATE_AND_APPLY_MIGRATIONS=false +## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching. +TITLE_MATCH_THRESHOLD=0.25 # Producer GITHUB_PAT= diff --git a/src/node/consumer/package-lock.json b/src/node/consumer/package-lock.json index c9e57706..fc3f7b16 100644 --- a/src/node/consumer/package-lock.json +++ b/src/node/consumer/package-lock.json @@ -14,6 +14,7 @@ "axios": "^1.6.1", "bottleneck": "^2.19.5", "cache-manager": "^5.4.0", + "fuse.js": "^7.0.0", "google-sr": "^3.2.1", "inversify": "^6.0.2", "magnet-uri": "^6.2.0", @@ -4782,6 +4783,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/fuse.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", + "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==", + "engines": { + "node": ">=10" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", diff --git a/src/node/consumer/package.json b/src/node/consumer/package.json index 33e4c553..86369aeb 100644 --- a/src/node/consumer/package.json +++ b/src/node/consumer/package.json @@ -19,6 +19,7 @@ "axios": "^1.6.1", "bottleneck": "^2.19.5", "cache-manager": "^5.4.0", + "fuse.js": "^7.0.0", "google-sr": "^3.2.1", "inversify": "^6.0.2", "magnet-uri": "^6.2.0", diff --git a/src/node/consumer/src/lib/models/configuration/metadata_config.ts b/src/node/consumer/src/lib/models/configuration/metadata_config.ts index 32b4e336..d728c102 100644 --- a/src/node/consumer/src/lib/models/configuration/metadata_config.ts +++ b/src/node/consumer/src/lib/models/configuration/metadata_config.ts @@ -1,4 +1,5 @@ export const metadataConfig = { IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || "1", 10), - IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10) + IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10), + TITLE_MATCH_THRESHOLD: Number(process.env.TITLE_MATCH_THRESHOLD || 0.25), }; \ No newline at end of file diff --git a/src/node/consumer/src/lib/mongo/interfaces/mongo_metadata_query.ts b/src/node/consumer/src/lib/mongo/interfaces/mongo_metadata_query.ts index 6c894cec..4b1db7f2 100644 --- a/src/node/consumer/src/lib/mongo/interfaces/mongo_metadata_query.ts +++ b/src/node/consumer/src/lib/mongo/interfaces/mongo_metadata_query.ts @@ -1,5 +1,5 @@ export interface IMongoMetadataQuery { $text: { $search: string }, - TitleType: string; + TitleType: string, StartYear?: string; } \ No newline at end of file diff --git a/src/node/consumer/src/lib/mongo/mongo_repository.ts b/src/node/consumer/src/lib/mongo/mongo_repository.ts index 92c87f0a..82e7db10 100644 --- a/src/node/consumer/src/lib/mongo/mongo_repository.ts +++ b/src/node/consumer/src/lib/mongo/mongo_repository.ts @@ -1,13 +1,21 @@ import {TorrentType} from "@enums/torrent_types"; import {ILoggingService} from "@interfaces/logging_service"; +import {IImdbEntry} from "@mongo/interfaces/imdb_entry_attributes"; import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query"; import {IMongoRepository} from "@mongo/interfaces/mongo_repository"; import {ImdbEntryModel} from "@mongo/models/imdb_entries_model"; import {configurationService} from '@services/configuration_service'; import {IocTypes} from "@setup/ioc_types"; +import Fuse, {FuseResult, IFuseOptions} from 'fuse.js'; import {inject, injectable} from "inversify"; import mongoose from 'mongoose'; +const fuseOptions : IFuseOptions = { + includeScore: true, + keys: ['PrimaryTitle', 'OriginalTitle'], + threshold: configurationService.metadataConfig.TITLE_MATCH_THRESHOLD, +}; + @injectable() export class MongoRepository implements IMongoRepository { @inject(IocTypes.ILoggingService) private logger: ILoggingService; @@ -19,27 +27,34 @@ export class MongoRepository implements IMongoRepository { this.logger.info('Successfully connected to mongo db'); } catch (error) { - this.logger.debug('Failed to connect to mongo db', error); - this.logger.error('Failed to connect to mongo db'); + const FAILED_TO_CONNECT = 'Failed to connect to mongo db'; + this.logger.debug(FAILED_TO_CONNECT, error); + this.logger.error(FAILED_TO_CONNECT); process.exit(1); } } async getImdbId(title: string, category: string, year?: string | number) : Promise { const titleType: string = category === TorrentType.Series ? 'tvSeries' : 'movie'; - const query: IMongoMetadataQuery = { $text: { $search: title }, - TitleType: titleType, + TitleType: titleType }; - if (year) { query.StartYear = year.toString(); } - try { - const result = await ImdbEntryModel.findOne(query, '_id', {score: {$meta: "textScore" }}).sort({score: {$meta: "textScore"}}).limit(10).maxTimeMS(30000); - return result ? result._id : null; + const results = await ImdbEntryModel.find(query).limit(100).maxTimeMS(30000); + if (!results.length) { + return null; + } + const fuse: Fuse = new Fuse(results, fuseOptions); + const searchResults: FuseResult[] = fuse.search(title); + if (!searchResults.length) { + return null; + } + const [bestMatch] = searchResults; + return bestMatch.item._id; } catch (error) { this.logger.error('Query exceeded the 30 seconds time limit', error); return null; diff --git a/src/node/consumer/test/services/mongo_repository.test.ts b/src/node/consumer/test/services/mongo_repository.test.ts index 7e45fc0a..09c3d895 100644 --- a/src/node/consumer/test/services/mongo_repository.test.ts +++ b/src/node/consumer/test/services/mongo_repository.test.ts @@ -66,7 +66,7 @@ xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by expect(result).toBe('tt0084726'); }, 30000); - it('should get Wrath of Khan imdbId correctly', async () => { + it('should get Wrath of Khan simple imdbId correctly', async () => { await mongoRepository.connect(); const result = await mongoRepository.getImdbId('Wrath of Khan', TorrentType.Movie, 1982); expect(result).toBe('tt0084726');