Skip to content

Commit

Permalink
adds methods to set the IContentParser
Browse files Browse the repository at this point in the history
  • Loading branch information
justynhunter committed Nov 7, 2023
1 parent c5d4b7c commit 810db61
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
21 changes: 14 additions & 7 deletions WebReaper/Builders/ScraperEngineBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using WebReaper.Core.CookieStorage.Abstract;
using WebReaper.Core.LinkTracker.Abstract;
using WebReaper.Core.LinkTracker.Concrete;
using WebReaper.Core.Parser.Abstract;
using WebReaper.Core.Scheduler.Abstract;
using WebReaper.Core.Scheduler.Concrete;
using WebReaper.Domain;
Expand Down Expand Up @@ -35,9 +36,15 @@ public class ScraperEngineBuilder

private IScheduler Scheduler { get; set; } = new InMemoryScheduler();
private IScraperConfigStorage? ConfigStorage { get; set; } = new InMemoryScraperConfigStorage();

protected IProxyProvider? ProxyProvider { get; set; }

public ScraperEngineBuilder WithContentParser(IContentParser contentParser)
{
SpiderBuilder.WithContentParser(contentParser);
return this;
}

public ScraperEngineBuilder AddSink(IScraperSink sink)
{
SpiderBuilder.AddSink(sink);
Expand Down Expand Up @@ -186,7 +193,7 @@ public ScraperEngineBuilder GetWithBrowser(
ConfigBuilder.GetWithBrowser(startUrls, actionBuilder?.Invoke(new PageActionBuilder()));
return this;
}

public ScraperEngineBuilder GetWithBrowser(params string[] startUrls)
{
ConfigBuilder.GetWithBrowser(startUrls);
Expand All @@ -201,7 +208,7 @@ public ScraperEngineBuilder Follow(string linkSelector)

public ScraperEngineBuilder FollowWithBrowser(
string linkSelector,
Func<PageActionBuilder,
Func<PageActionBuilder,
List<PageAction>>? actionBuilder = null)
{
ConfigBuilder.FollowWithBrowser(linkSelector, actionBuilder?.Invoke(new PageActionBuilder()));
Expand Down Expand Up @@ -278,7 +285,7 @@ public ScraperEngineBuilder WithMongoDbCookieStorage(string connectionString, st
logger);
return this;
}

public ScraperEngineBuilder WithFileCookieStorage(string fileName)
{
SpiderBuilder.WithFileCookieStorage(fileName);
Expand Down Expand Up @@ -335,12 +342,12 @@ public ScraperEngineBuilder WithParallelismDegree(int parallelismDegree)
public async Task<ScraperEngine> BuildAsync()
{
SpiderBuilder.WithConfigStorage(ConfigStorage);

var config = ConfigBuilder.Build();
var spider = SpiderBuilder.Build();

await ConfigStorage.CreateConfigAsync(config);

return new ScraperEngine(_parallelismDegree, ConfigStorage, Scheduler, spider, Logger);
}
}
}
10 changes: 8 additions & 2 deletions WebReaper/Builders/SpiderBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ public class SpiderBuilder

protected event Action<ParsedData> ScrapedData;

public SpiderBuilder WithContentParser(IContentParser contentParser)
{
ContentParser = contentParser;
return this;
}

public SpiderBuilder WithLogger(ILogger logger)
{
Logger = logger;
Expand Down Expand Up @@ -166,7 +172,7 @@ public SpiderBuilder WithRedisCookieStorage(string connectionString, string redi
CookieStorage = new RedisCookieStorage(connectionString, redisKey, Logger);
return this;
}

public SpiderBuilder WithFileCookieStorage(string fileName)
{
CookieStorage = new FileCookieStorage(fileName, Logger);
Expand Down Expand Up @@ -235,4 +241,4 @@ public ISpider Build()

return spider;
}
}
}

0 comments on commit 810db61

Please sign in to comment.