Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further enhance title matching by added fuse with a configurable threshold #104

Merged
merged 2 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions deployment/docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ UDP_TRACKERS_ENABLED=true
CONSUMER_REPLICAS=3
## Fix for #66 - toggle on for development
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching.
TITLE_MATCH_THRESHOLD=0.25

# Producer
GITHUB_PAT=
9 changes: 9 additions & 0 deletions src/node/consumer/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/node/consumer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export const metadataConfig = {
IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || "1", 10),
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10)
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10),
TITLE_MATCH_THRESHOLD: Number(process.env.TITLE_MATCH_THRESHOLD || 0.25),
};
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export interface IMongoMetadataQuery {
$text: { $search: string },
TitleType: string;
TitleType: string,
StartYear?: string;
}
31 changes: 23 additions & 8 deletions src/node/consumer/src/lib/mongo/mongo_repository.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {IImdbEntry} from "@mongo/interfaces/imdb_entry_attributes";
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
import {configurationService} from '@services/configuration_service';
import {IocTypes} from "@setup/ioc_types";
import Fuse, {FuseResult, IFuseOptions} from 'fuse.js';
import {inject, injectable} from "inversify";
import mongoose from 'mongoose';

const fuseOptions : IFuseOptions<IImdbEntry> = {
includeScore: true,
keys: ['PrimaryTitle', 'OriginalTitle'],
threshold: configurationService.metadataConfig.TITLE_MATCH_THRESHOLD,
};

@injectable()
export class MongoRepository implements IMongoRepository {
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
Expand All @@ -19,27 +27,34 @@ export class MongoRepository implements IMongoRepository {
this.logger.info('Successfully connected to mongo db');
}
catch (error) {
this.logger.debug('Failed to connect to mongo db', error);
this.logger.error('Failed to connect to mongo db');
const FAILED_TO_CONNECT = 'Failed to connect to mongo db';
this.logger.debug(FAILED_TO_CONNECT, error);
this.logger.error(FAILED_TO_CONNECT);
process.exit(1);
}
}

async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
const titleType: string = category === TorrentType.Series ? 'tvSeries' : 'movie';

const query: IMongoMetadataQuery = {
$text: { $search: title },
TitleType: titleType,
TitleType: titleType
};

if (year) {
query.StartYear = year.toString();
}

try {
const result = await ImdbEntryModel.findOne(query, '_id', {score: {$meta: "textScore" }}).sort({score: {$meta: "textScore"}}).limit(10).maxTimeMS(30000);
return result ? result._id : null;
const results = await ImdbEntryModel.find(query).limit(100).maxTimeMS(30000);
if (!results.length) {
return null;
}
const fuse: Fuse<IImdbEntry> = new Fuse(results, fuseOptions);
const searchResults: FuseResult<IImdbEntry>[] = fuse.search(title);
if (!searchResults.length) {
return null;
}
const [bestMatch] = searchResults;
return bestMatch.item._id;
} catch (error) {
this.logger.error('Query exceeded the 30 seconds time limit', error);
return null;
Expand Down
2 changes: 1 addition & 1 deletion src/node/consumer/test/services/mongo_repository.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by
expect(result).toBe('tt0084726');
}, 30000);

it('should get Wrath of Khan imdbId correctly', async () => {
it('should get Wrath of Khan simple imdbId correctly', async () => {
await mongoRepository.connect();
const result = await mongoRepository.getImdbId('Wrath of Khan', TorrentType.Movie, 1982);
expect(result).toBe('tt0084726');
Expand Down