-
Notifications
You must be signed in to change notification settings - Fork 8
Using LinqToAnything to index a large collection using a hashtable
Harry McIntyre edited this page Nov 17, 2016
·
8 revisions
Ever needed to speed up an in-memory Linq-To-Objects query, but don't want to lose the Linq syntax?
I used this extension on batch process which reads in several large CSVs and joins them together using Where() clauses to reduce processing time from 1hr to 5m.
Here is an example of indexing a large collection using by using a hashtable lookup instead of iterating when you use a .Where clause.
public static void Program(string[] args)
{
var users= CsvHelper.ReadCsv<User>("users.csv")).ToArray()
.AsQueryable()
.IndexOn(u => u.UserId);
var friendShips = CsvHelper.ReadCsv<Friendship>("friendships.csv")).ToArray()
.AsQueryable()
.IndexOn(fr => fr.UserId);
var joined =
from user in users
let friends = friendships.Where(fr => fr.UserId == user.UserId)
.Select(fr => users.Where(u => u.UserId == fr.FriendUserId).ToArray())
.ToArray()
select new { user, friends};
Console.WriteLine(DateTime.Now);
var agg = joined.ToArray();
Console.WriteLine(DateTime.Now);
}
public static class IndexOnColumnExtension
{
public static IQueryable<T> IndexOn<T, TKey>(this IQueryable<T> items, Expression<Func<T, TKey>> propertySelectorExp)
{
var memberAccess = (MemberExpression) propertySelectorExp.Body;
var memberName = memberAccess.Member.Name;
var lambda = propertySelectorExp.Compile();
var lookup = items.ToLookup(lambda, t => t);
return new DelegateQueryable<T>(qi =>
{
var whereClause =
qi.Clauses.OfType<Where>()
.FirstOrDefault(c => c.PropertyName == memberName && c.Operator == "Equal");
if (whereClause != null)
{
var filteredItems = lookup[(TKey) whereClause.Value].AsQueryable();
qi.Clauses = qi.Clauses.Except(new[] {whereClause});
return qi.ApplyTo(filteredItems);
}
return qi.ApplyTo(items);
});
}
}