Skip to content

Using LinqToAnything to index a large collection using a hashtable

Harry McIntyre edited this page Nov 17, 2016 · 8 revisions

Ever needed to speed up an in-memory Linq-To-Objects query, but don't want to lose the Linq syntax?

I used this extension on batch process which reads in several large CSVs and joins them together using Where() clauses to reduce processing time from 1hr to 5m.

Here is an example of indexing a large collection using by using a hashtable lookup instead of iterating when you use a .Where clause.

public static void Program(string[] args)
{
    var users= CsvHelper.ReadCsv<User>("users.csv")).ToArray()
        .AsQueryable()
        .IndexOn(u => u.UserId);
        
    var friendShips = CsvHelper.ReadCsv<Friendship>("friendships.csv")).ToArray()
        .AsQueryable()
        .IndexOn(fr => fr.UserId);

    var joined =
	from user in users
	let friends = friendships.Where(fr => fr.UserId == user.UserId)
	    .Select(fr => users.Where(u => u.UserId == fr.FriendUserId).ToArray())
	    .ToArray()
	select new { user, friends};

    Console.WriteLine(DateTime.Now);
    var agg = joined.ToArray();
    Console.WriteLine(DateTime.Now);
}


public static class IndexOnColumnExtension
{
    public static IQueryable<T> IndexOn<T, TKey>(this IQueryable<T> items, Expression<Func<T, TKey>> propertySelectorExp)
    {
        var memberAccess = (MemberExpression) propertySelectorExp.Body;
        var memberName = memberAccess.Member.Name;
        var lambda = propertySelectorExp.Compile();
        var lookup = items.ToLookup(lambda, t => t);
        return new DelegateQueryable<T>(qi =>
        {
            var whereClause =
                qi.Clauses.OfType<Where>()
                    .FirstOrDefault(c => c.PropertyName == memberName && c.Operator == "Equal");
            if (whereClause != null)
            {
                var filteredItems = lookup[(TKey) whereClause.Value].AsQueryable();
                qi.Clauses = qi.Clauses.Except(new[] {whereClause});
                return qi.ApplyTo(filteredItems);
            }
            return qi.ApplyTo(items);
        });
    }
}
Clone this wiki locally