From 9d9c01a3286326a459f40f0d4294b437a834145b Mon Sep 17 00:00:00 2001 From: Kai Moschcau Date: Sun, 12 Nov 2023 14:38:18 +0100 Subject: [PATCH] feat(scraper): handle text instructions This expands the recipe instructions parser with the ability to handle recipe instructions which consist of simple string. --- api/package.json | 5 +- api/src/recipes/scraper/recipeScraper.spec.ts | 38 ++++++++++---- api/src/recipes/scraper/recipeScraper.ts | 7 ++- .../scraper/recipeScraperTestConstants.ts | 52 +++++++++++++++++++ api/yarn.lock | 21 ++++++++ 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/api/package.json b/api/package.json index 5108ae29..539acc76 100644 --- a/api/package.json +++ b/api/package.json @@ -47,6 +47,8 @@ "@nestjs/testing": "^8.0.0", "@types/express": "^4.17.14", "@types/jest": "27.4.1", + "@types/jsdom": "^21.1.5", + "@types/multer": "^1.4.7", "@types/node": "^18.11.10", "@types/passport-jwt": "^3.0.7", "@types/passport-local": "^1.0.34", @@ -64,8 +66,7 @@ "ts-loader": "^9.4.2", "ts-node": "^10.9.1", "tsconfig-paths": "^4.1.1", - "typescript": "^4.9.3", - "@types/multer": "^1.4.7" + "typescript": "^4.9.3" }, "jest": { "moduleFileExtensions": [ diff --git a/api/src/recipes/scraper/recipeScraper.spec.ts b/api/src/recipes/scraper/recipeScraper.spec.ts index 8f68cf09..caebd2d2 100644 --- a/api/src/recipes/scraper/recipeScraper.spec.ts +++ b/api/src/recipes/scraper/recipeScraper.spec.ts @@ -1,5 +1,5 @@ -import { RecipeScraper } from './recipeScraper'; import { Test, TestingModule } from '@nestjs/testing'; +import { RecipeScraper } from './recipeScraper'; import { allRecipesDomString, allRecipesMetadataObject, @@ -9,6 +9,7 @@ import { belliniUrl, mockNodeList, mockRecipe, + stringInstructionsRecipe, yoastDomString, yoastMetadataObject, yoastRecipe, @@ -63,14 +64,6 @@ describe('RecipeScraper', () => { ); }); - it('should correctly parse recipe steps', function () { - expect( - scraper.parseRecipeSteps(belliniMetadataObject.recipeInstructions), - ).toEqual([ - 'Put the peach puree in a Champagne flute up to about 1/3 full and slowly top up with Prosecco.', - ]); - }); - it('should correctly extract the recipe image url', function () { expect(scraper.parseImageUrl(belliniMetadataObject.image)).toEqual( belliniMetadataObject.image.url, @@ -120,3 +113,30 @@ describe('RecipeScraper', () => { jest.clearAllMocks(); }); }); + +describe('RecipeScraper#parseRecipeSteps', () => { + let scraper: RecipeScraper; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [RecipeScraper], + }).compile(); + scraper = module.get(RecipeScraper); + }); + + it('should correctly parse recipe steps from an array', function () { + expect( + scraper.parseRecipeSteps(belliniMetadataObject.recipeInstructions), + ).toEqual([ + 'Put the peach puree in a Champagne flute up to about 1/3 full and slowly top up with Prosecco.', + ]); + }); + + it('should correctly parse recipe steps from a string', function () { + expect( + scraper.parseRecipeSteps(stringInstructionsRecipe.recipeInstructions), + ).toEqual([ + 'Put the peach puree in a Champagne flute up to about 1/3 full and slowly top up with Prosecco.', + ]); + }); +}); diff --git a/api/src/recipes/scraper/recipeScraper.ts b/api/src/recipes/scraper/recipeScraper.ts index 09936fe4..ee738f19 100644 --- a/api/src/recipes/scraper/recipeScraper.ts +++ b/api/src/recipes/scraper/recipeScraper.ts @@ -4,10 +4,13 @@ import { Recipe } from '../schemas/recipe.schema'; @Injectable() export class RecipeScraper { - parseRecipeSteps(steps) { + parseRecipeSteps(steps: string | Array): string[] { + if (typeof steps === 'string') return [steps.trim()]; + return steps.flat().map((step) => { if (typeof step === 'string') return step.trim(); - if (step.hasOwnProperty('text')) return step.text.trim(); + if ('text' in step && typeof step.text === 'string') + return step.text.trim(); throw new Error('Unable to parse recipe steps'); }); } diff --git a/api/src/recipes/scraper/recipeScraperTestConstants.ts b/api/src/recipes/scraper/recipeScraperTestConstants.ts index 6a691019..e68c3df0 100644 --- a/api/src/recipes/scraper/recipeScraperTestConstants.ts +++ b/api/src/recipes/scraper/recipeScraperTestConstants.ts @@ -67,6 +67,58 @@ export const belliniMetadataObject = { totalTime: 'PT5M', }; +export const stringInstructionsRecipe = { + '@context': 'https://schema.org', + '@id': 'https://www.bbcgoodfood.com/recipes/bellini#Recipe', + '@type': 'Recipe', + description: + 'A classy cocktail served in an elegant flute - this simple combination of peach purée and Prosecco makes a great start to any celebration', + image: { + '@type': 'ImageObject', + height: 400, + url: 'https://images.immediate.co.uk/production/volatile/sites/30/2020/08/bellini-b049342.jpg', + width: 440, + }, + mainEntityOfPage: { + '@type': 'WebPage', + '@id': 'https://www.bbcgoodfood.com/recipes/bellini', + }, + name: 'Bellini', + url: 'https://www.bbcgoodfood.com/recipes/bellini', + author: { '@type': 'Person', name: 'Good Food team' }, + dateModified: '2020-08-08T02:26:26+00:00', + datePublished: '2013-11-18T16:37:44+00:00', + headline: 'Bellini', + keywords: + 'Christmas, Christmas morning, cocktails canapes, Good Food, Party, sparkling cocktail', + publisher: { + '@type': 'Organization', + name: 'BBC Good Food', + url: 'https://www.bbcgoodfood.com', + logo: { + '@type': 'ImageObject', + url: 'https://images.immediate.co.uk/production/volatile/sites/30/2019/07/GoodFood-dark-516d417.png', + width: 221, + height: 58, + }, + }, + nutrition: { + '@type': 'NutritionInformation', + calories: '143 calories', + carbohydrateContent: '18 grams carbohydrates', + sugarContent: '18 grams sugar', + fiberContent: '0.7 grams fiber', + proteinContent: '0.7 grams protein', + }, + prepTime: 'PT5M', + recipeCategory: 'Cocktails', + recipeIngredient: ['500ml peach purée or peach nectar', '1 bottle prosecco'], + recipeInstructions: + 'Put the peach puree in a Champagne flute up to about 1/3 full and slowly top up with Prosecco.', + recipeYield: 6, + totalTime: 'PT5M', +}; + export const mockRecipe = ( url = 'https://www.bbcgoodfood.com/recipes/bellini', name = 'Bellini', diff --git a/api/yarn.lock b/api/yarn.lock index d7666490..a55dcb61 100644 --- a/api/yarn.lock +++ b/api/yarn.lock @@ -1629,6 +1629,15 @@ jest-matcher-utils "^27.0.0" pretty-format "^27.0.0" +"@types/jsdom@^21.1.5": + version "21.1.5" + resolved "https://registry.yarnpkg.com/@types/jsdom/-/jsdom-21.1.5.tgz#b5d0bccd2436a2bc166dbe235f1dc43a1f922d40" + integrity sha512-sBK/3YjS3uuPj+HzZyhB4GGTnFmk0mdyQfhzZ/sqs9ciyG41QJdZZdwcPa6OfW97OTNTwl5tBAsfEOm/dui9pQ== + dependencies: + "@types/node" "*" + "@types/tough-cookie" "*" + parse5 "^7.0.0" + "@types/json-schema@*", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9": version "7.0.11" resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.11.tgz#d421b6c527a3037f7c84433fd2c4229e016863d3" @@ -1744,6 +1753,11 @@ dependencies: "@types/superagent" "*" +"@types/tough-cookie@*": + version "4.0.5" + resolved "https://registry.yarnpkg.com/@types/tough-cookie/-/tough-cookie-4.0.5.tgz#cb6e2a691b70cb177c6e3ae9c1d2e8b2ea8cd304" + integrity sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA== + "@types/webidl-conversions@*": version "6.1.1" resolved "https://registry.yarnpkg.com/@types/webidl-conversions/-/webidl-conversions-6.1.1.tgz#e33bc8ea812a01f63f90481c666334844b12a09e" @@ -4902,6 +4916,13 @@ parse5@6.0.1: resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b" integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw== +parse5@^7.0.0: + version "7.1.2" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32" + integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw== + dependencies: + entities "^4.4.0" + parse5@^7.1.1: version "7.1.1" resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.1.tgz#4649f940ccfb95d8754f37f73078ea20afe0c746"