diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
index 93c4658..4fb4c7c 100644
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@@ -1,8 +1,10 @@
 /Users/berenz/Downloads/Template of Abstract in Latex.tex="A4C7846D"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/.gitignore="C912F95E"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/DESCRIPTION="019D16E4"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/controls.R="5BC637B7"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_annoy.R="684202BA"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_hnsw.R="A4FAA5A3"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_mlpack.R="B6A90565"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_nnd.R="87049873"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/methods.R="B7F84C4B"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/reclin2_pair_ann.R="1D89EE3E"
@@ -15,8 +17,10 @@
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_mlpack.R="51D2EAA1"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_print.R="AA7835F7"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_reclin2.R="E3E08D07"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/misc/hnsw-nndesc.Rmd="F39A0093"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/tests/tinytest.R="D6BBCDC1"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v1-deduplication.Rmd="9D34DD44"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v2-reclin.Rmd="289A4D2F"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v3-evaluation.Rmd="E778A54F"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v4-integration.Rmd="E3EFC8F1"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v5-bigdata.Rmd="335CBF49"
diff --git a/.gitignore b/.gitignore
index b15e3fc..a83549c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
 playground
 docs
 inst/doc
+misc
diff --git a/R/blocking.R b/R/blocking.R
index efcec47..6ba6609 100644
--- a/R/blocking.R
+++ b/R/blocking.R
@@ -219,6 +219,7 @@ blocking <- function(x,
                                     y = y_dtm[, colnames_xy],
                                     k = k,
                                     distance = distance,
+                                    deduplication = deduplication,
                                     verbose = if (verbose == 2) TRUE else FALSE,
                                     n_threads = n_threads,
                                     control = control_ann),
diff --git a/R/controls.R b/R/controls.R
index bc5dc28..0e862f2 100644
--- a/R/controls.R
+++ b/R/controls.R
@@ -6,17 +6,19 @@
 #' Controls for ANN algorithms used in the package
 #'
 #' @param sparse whether sparse data should be used as an input for algorithms,
-#' @param nnd parameters for [rnndescent::rnnd_build()] and [rnndescent::rnnd_query()],
-#' @param hnsw parameters for [RcppHNSW::hnsw_build()] and [RcppHNSW::hnsw_search()],
-#' @param lsh parameters for [mlpack::lsh()],
-#' @param annoy parameters for [RcppAnnoy] package,
-#' @param kd parameters for [mlpack::knn()] function.
+#' @param k_search number of neighbours to search,
+#' @param nnd list of parameters for [rnndescent::rnnd_build()] and [rnndescent::rnnd_query()],
+#' @param hnsw list of parameters for [RcppHNSW::hnsw_build()] and [RcppHNSW::hnsw_search()],
+#' @param lsh list of parameters for [mlpack::lsh()],
+#' @param lisf of kd parameters for [mlpack::knn()] function,
+#' @param annoy list of parameters for [RcppAnnoy] package.
 #'
 #' @returns Returns a list with parameters
 #'
 #' @export
 controls_ann <- function(
     sparse = FALSE,
+    k_search = 30,
     nnd = list(k_build = 30,
                use_alt_metric = TRUE,
                init = "tree",
@@ -45,18 +47,19 @@ controls_ann <- function(
                num_probes = 0,
                projections = 10,
                tables = 30),
-    annoy = list(n_trees = 250,
-                 build_on_disk = FALSE),
     kd = list(algorithm = "dual_tree",
               epsilon = 0,
               leaf_size = 20,
               random_basis = FALSE,
               rho = 0.7,
               tau = 0,
-              tree_type = "kd")
+              tree_type = "kd"),
+    annoy = list(n_trees = 250,
+                 build_on_disk = FALSE)
     ) {
 
    list(sparse = sparse,
+        k_search = k_search,
         nnd = nnd,
         hnsw = hnsw,
         lsh = lsh,
@@ -73,8 +76,8 @@ controls_ann <- function(
 #'
 #' @param n_shingles length of shingles (default `2L`),
 #' @param n_chunks passed to (default `10L`),
-#' @param lowercase should the caracters be made lowercase? (default `TRUE`)
-#' @param strip_non_alphanum should punctuation and white space be stripped? (default `TRUE`)
+#' @param lowercase should the characters be made lowercase? (default `TRUE`),
+#' @param strip_non_alphanum should punctuation and white space be stripped? (default `TRUE`).
 #'
 #' @returns Returns a list with parameters.
 #'
diff --git a/R/method_annoy.R b/R/method_annoy.R
index db064b4..316c495 100644
--- a/R/method_annoy.R
+++ b/R/method_annoy.R
@@ -53,7 +53,7 @@ method_annoy <- function(x,
   }
   if (verbose) l_ind$setVerbose(1)
 
-  ## index - this does not require dense matrix (sparse can be used?)
+  ## index - this does not require dense matrix
   for (i in 1:nrow(x)) l_ind$addItem(i - 1, x[i,])
   l_ind$build(control$annoy$n_trees)
   l_ind_nns <- numeric(length = nrow(y))
@@ -61,7 +61,10 @@ method_annoy <- function(x,
 
   ## query
   for (i in 1:nrow(y)) {
-    annoy_res <- l_ind$getNNsByVectorList(y[i, ], k, -1, TRUE)
+    annoy_res <- l_ind$getNNsByVectorList(y[i, ],
+                                          if (nrow(x) < control$k_search) nrow(x) else control$k_search,
+                                          -1,
+                                          TRUE)
     l_ind_nns[i] <- annoy_res$item[k]
     l_ind_dist[i] <- annoy_res$distance[k]
   }
diff --git a/R/method_hnsw.R b/R/method_hnsw.R
index 5af47ea..3c4bb3b 100644
--- a/R/method_hnsw.R
+++ b/R/method_hnsw.R
@@ -54,9 +54,12 @@ method_hnsw <- function(x,
 
     l_ind$setEf(control$hnsw$ef_s)
 
+    ## this does not handle the control$k_search parameter
     l_1nn_m <- list()
     for (i in 1:nrow(y)) {
-      l_1nn_m[[i]] <- l_ind$getNNsList(y[i,], k, TRUE)
+      l_1nn_m[[i]] <- l_ind$getNNsList(y[i,],
+                                       k,
+                                       TRUE)
     }
 
     l_1nn <- list(idx = do.call("rbind",lapply(l_1nn_m, "[[", "item")),
@@ -76,7 +79,7 @@ method_hnsw <- function(x,
     ## query
     l_1nn <- RcppHNSW::hnsw_search(X = y,
                                    ann = l_ind,
-                                   k = k,
+                                   k = if (nrow(x) < control$k_search) nrow(x) else control$k_search,
                                    ef = control$hnsw$ef_s,
                                    verbose = verbose,
                                    n_threads = n_threads)
diff --git a/R/method_mlpack.R b/R/method_mlpack.R
index d76cf55..0859a63 100644
--- a/R/method_mlpack.R
+++ b/R/method_mlpack.R
@@ -38,7 +38,7 @@ method_mlpack <- function(x,
   y <- as.matrix(y)
 
   result <- switch(algo,
-                   "lsh" = mlpack::lsh(k = k,
+                   "lsh" = mlpack::lsh(k = if (nrow(x) < control$k_search) nrow(x) else control$k_search,
                                        query = y,
                                        reference = x,
                                        verbose = verbose,
@@ -48,7 +48,7 @@ method_mlpack <- function(x,
                                        num_probes = control$lsh$num_probes,
                                        projections = control$lsh$projections,
                                        tables = control$lsh$tables),
-                   "kd" = mlpack::knn(k = k,
+                   "kd" = mlpack::knn(k = if (nrow(x) < control$k_search) nrow(x) else control$k_search,
                                        query = y,
                                        reference = x,
                                        verbose = verbose,
diff --git a/R/method_nnd.R b/R/method_nnd.R
index 341d3f7..e2e0b4a 100644
--- a/R/method_nnd.R
+++ b/R/method_nnd.R
@@ -10,6 +10,7 @@
 #' @param y query data,
 #' @param k number of neighbours to return,
 #' @param distance 	type of distance to calculate,
+#' @param deduplication whether the deduplication is applied,
 #' @param verbose if TRUE, log messages to the console,
 #' @param n_threads maximum number of threads to use,
 #' @param control controls for the NN descent algorithm.
@@ -22,12 +23,13 @@ method_nnd <- function(x,
                        y,
                        k,
                        distance,
+                       deduplication,
                        verbose,
                        n_threads,
                        control) {
 
   l_ind <- rnndescent::rnnd_build(data = x,
-                                  k = if (nrow(x) < control$nnd$k_build) nrow(x) else control$nnd$k_build,
+                                  k = if (nrow(x) < control$nnd$k_build) nrow(x)-1 else control$nnd$k_build,
                                   metric = distance,
                                   verbose = verbose,
                                   n_threads = n_threads,
@@ -49,10 +51,22 @@ method_nnd <- function(x,
                                   progress = control$nnd$progress,
                                   obs = control$nnd$obs)
 
-  ## query
+  ## query k dependent on the study
+  ## there is a problem when dataset is small
+
+  if (deduplication == T) {
+    k_nnd_query <- k
+  } else if (nrow(x) < 10) {
+    k_nnd_query <- k
+  } else if (nrow(x) < control$k_search) {
+    k_nnd_query <- nrow(x)
+  } else {
+    k_nnd_query <- control$k_search
+  }
+
   l_1nn <- rnndescent::rnnd_query(index = l_ind,
                                   query = y,
-                                  k = k,
+                                  k = k_nnd_query,
                                   epsilon = 0.1,
                                   max_search_fraction = 1,
                                   init = NULL,
diff --git a/R/methods.R b/R/methods.R
index c18763e..0447fb4 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -20,7 +20,9 @@ print.blocking <- function(x,...) {
   if (!is.null(x$metrics)) {
     cat("========================================================\n")
     cat("Evaluation metrics (standard):\n" )
-    sprintf("%.4f", x$metrics*100)
+    metrics <- as.numeric(sprintf("%.4f", x$metrics*100))
+    names(metrics)  <- names(result2$metrics)
+    print(metrics)
 
   }
   invisible(x)
diff --git a/README.Rmd b/README.Rmd
index 60898c5..cc1aff0 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -18,6 +18,10 @@ knitr::opts_chunk$set(
 
 # Overview
 
+## Warning!
+
+The package is under heavily development so the API as well as functionalities may change.
+
 ## Description
 
 This R package is designed to block records for data deduplication and record linkage (also known as entity resolution) using [approximate nearest neighbours algorithms (ANN)](https://en.wikipedia.org/wiki/Nearest_neighbor_search) and graphs (via the `igraph` package).
diff --git a/README.md b/README.md
index 38757de..f8f9642 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,11 @@ coverage](https://codecov.io/gh/ncn-foreigners/blocking/branch/main/graph/badge.
 
 # Overview
 
+## Warning!
+
+The package is under heavily development so the API as well as
+functionalities may change.
+
 ## Description
 
 This R package is designed to block records for data deduplication and
diff --git a/inst/tinytest/test_annoy.R b/inst/tinytest/test_annoy.R
index 34c44d3..3877aea 100644
--- a/inst/tinytest/test_annoy.R
+++ b/inst/tinytest/test_annoy.R
@@ -44,14 +44,14 @@ expect_equal(
     list(x = c(1, 1, 1, 2, 2, 2, 2, 3),
          y = c(5L, 6L, 7L, 1L, 2L, 3L, 4L, 8L),
          block = c(2, 2, 2, 1, 1, 1, 1, 3),
-         dist = c(0, 1, 0, 1, 0, 1, 4, 5)),
+         dist = c(0, 1, 0, 1, 0, 1, 4, 4)),
     row.names = c(NA, -8L),
     class = c("data.table", "data.frame")),
     method = "annoy",
     deduplication = FALSE,
     metrics = NULL,
-    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "nt",
-                 "ow", "py", "sk", "ty", "wa", "yp", "yt", "on",  "th"),
+    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "ow",
+                 "py", "sk", "ty", "wa", "yp", "yt", "nt", "on", "th"),
     graph = NULL),
     class = "blocking")
 )
diff --git a/inst/tinytest/test_blocking.R b/inst/tinytest/test_blocking.R
index ac1fba9..71e75d7 100644
--- a/inst/tinytest/test_blocking.R
+++ b/inst/tinytest/test_blocking.R
@@ -5,6 +5,23 @@ expect_silent(
 )
 
 
+expect_equal(
+  blocking(x = df_example$txt)$result$block,
+  c(1, 1, 1, 2, 2, 2)
+)
+
+
+expect_equal(
+  blocking(x = df_example$txt, ann = "hnsw")$result$block,
+  c(1, 1, 1, 2, 2, 2)
+)
+
+expect_equal(
+  blocking(x = df_example$txt, ann = "annoy")$result$block,
+  c(1, 1, 1, 2, 2, 2)
+)
+
+
 expect_equal(
   blocking(x = df_example$txt, ann = "lsh")$result$block,
   c(1, 1, 1, 2, 2, 2)
@@ -15,11 +32,29 @@ expect_equal(
   c(1, 1, 1, 2, 2, 2)
 )
 
-
 expect_silent(
   blocking(x = df_base$txt, y = df_example$txt)
 )
 
+expect_equal(
+  blocking(x = df_base$txt, y = df_example$txt)$result$block,
+  c(rep(2,4),rep(1,4))
+)
+
+expect_equal(
+  blocking(x = df_base$txt, y = df_example$txt, ann = "hnsw")$result$block,
+  c(rep(2,4),rep(1,4))
+)
+
+expect_equal(
+  blocking(x = df_base$txt, y = df_example$txt, ann = "annoy")$result$block,
+  c(rep(2,4),rep(1,4))
+)
+
+expect_equal(
+  blocking(x = df_base$txt, y = df_example$txt, ann = "lsh")$result$block,
+  c(rep(2,3),rep(1,4), 3)
+)
 
 expect_silent(
   blocking(x = mat_y)
diff --git a/inst/tinytest/test_data.R b/inst/tinytest/test_data.R
index b760f2b..1a7e7de 100644
--- a/inst/tinytest/test_data.R
+++ b/inst/tinytest/test_data.R
@@ -11,7 +11,7 @@ df_example <- data.frame(txt = c(
   "cyrkmontypython",
   "monty"
 ))
-df_base <- data.frame(txt = c("montypython", "kowalskijan", "other"))
+df_base <- data.frame(txt = c("montypython", "kowalskijan", "somethingcompletelydifferent"))
 
 
 
diff --git a/inst/tinytest/test_hnsw.R b/inst/tinytest/test_hnsw.R
index 5cbebc9..f4ac9e6 100644
--- a/inst/tinytest/test_hnsw.R
+++ b/inst/tinytest/test_hnsw.R
@@ -25,8 +25,8 @@ expect_equal(
     method = "hnsw",
     deduplication = FALSE,
     metrics = NULL,
-    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "nt", "ow",
-                 "py", "sk", "ty", "wa", "yp", "yt", "on", "th"),
+    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "ow",
+                 "py", "sk", "ty", "wa", "yp", "yt", "nt", "on", "th"),
     graph = NULL),
     class = "blocking")
 )
@@ -56,8 +56,8 @@ expect_equal(
     method = "hnsw",
     deduplication = FALSE,
     metrics = NULL,
-    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "nt", "ow",
-                 "py", "sk", "ty", "wa", "yp", "yt", "on", "th"),
+    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "nt",
+                 "ow", "py", "sk", "ty", "wa", "yp", "yt", "on", "th"),
     graph = NULL),
     class = "blocking")
 )
diff --git a/inst/tinytest/test_mlpack.R b/inst/tinytest/test_mlpack.R
index 27172f0..45703ea 100644
--- a/inst/tinytest/test_mlpack.R
+++ b/inst/tinytest/test_mlpack.R
@@ -4,18 +4,18 @@ expect_equal(
   blocking(x = df_base$txt,
            y = df_example$txt,
            ann = "lsh"),
-  structure(list(result = structure(
-    list(x = c(1, 1, 1, 2, 2, 2, 2, 3),
-         y = c(5L, 6L, 7L, 1L, 2L, 3L, 4L, 8L),
-         block = c(2, 2, 2, 1, 1, 1, 1, 3),
-         dist = c(0, 1, 0, 1, 0, 1, 2, 2.23606797749979)),
+  structure(list(result = structure(list(
+    x = c(1, 1, 1, 2, 2, 2, 2, 3),
+    y = c(5L, 6L, 7L, 1L, 2L, 3L, 4L, 8L),
+    block = c(2, 2, 2, 1, 1, 1, 1, 3),
+    dist = c(0, 1, 0, 1, 0, 1, 2, 2)),
     row.names = c(NA, -8L),
     class = c("data.table", "data.frame")),
     method = "lsh",
     deduplication = FALSE,
     metrics = NULL,
-    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "nt", "ow",
-                 "py", "sk", "ty", "wa", "yp", "yt", "on", "th"),
+    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "ow",
+                 "py", "sk", "ty", "wa", "yp", "yt", "nt", "on", "th"),
     graph = NULL),
     class = "blocking")
 )
@@ -28,14 +28,14 @@ expect_equal(
     list(x = c(1, 1, 1, 2, 2, 2, 2, 3),
          y = c(5L, 6L, 7L, 1L, 2L, 3L, 4L, 8L),
          block = c(2, 2, 2, 1, 1, 1, 1, 3),
-         dist = c(0, 1, 0, 1, 0, 1, 2, 2.23606797749979)),
+         dist = c(0, 1, 0, 1, 0, 1, 2, 2)),
     row.names = c(NA, -8L),
     class = c("data.table", "data.frame")),
     method = "kd",
     deduplication = FALSE,
     metrics = NULL,
-    colnames = c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "nt", "ow",
-                 "py", "sk", "ty", "wa", "yp", "yt", "on", "th"),
+    colnames =c("al", "an", "ho", "ij", "ja", "ki", "ko", "ls", "mo", "ow",
+                "py", "sk", "ty", "wa", "yp", "yt", "nt", "on", "th"),
     graph = NULL),
     class = "blocking")
 )
@@ -61,7 +61,7 @@ expect_equal(
     list(x = c(1, 1, 1, 2, 2, 2, 2, 3),
          y = c(5L, 6L, 7L, 1L, 2L, 3L, 4L, 8L),
          block = c(2, 2, 2, 1, 1, 1, 1, 3),
-         dist = c(0, 1, 0, 1, 0, 1, 2, 2.23606797749979)),
+         dist = c(0, 1, 0, 1, 0, 1, 2, 2.236068)),
     row.names = c(NA, -8L),
     class = c("data.table", "data.frame")),
     method = "lsh",
@@ -82,7 +82,7 @@ expect_equal(
     list(x = c(1, 1, 1, 2, 2, 2, 2, 3),
          y = c(5L, 6L, 7L, 1L, 2L, 3L, 4L, 8L),
          block = c(2, 2, 2, 1, 1, 1, 1, 3),
-         dist = c(0, 1, 0, 1, 0, 1, 2, 2.23606797749979)),
+         dist = c(0, 1, 0, 1, 0, 1, 2, 2.236068)),
     row.names = c(NA, -8L),
     class = c("data.table", "data.frame")),
     method = "kd",
diff --git a/man/controls_ann.Rd b/man/controls_ann.Rd
index 03496fa..614d0ae 100644
--- a/man/controls_ann.Rd
+++ b/man/controls_ann.Rd
@@ -6,6 +6,7 @@
 \usage{
 controls_ann(
   sparse = FALSE,
+  k_search = 30,
   nnd = list(k_build = 30, use_alt_metric = TRUE, init = "tree", n_trees = NULL,
     leaf_size = NULL, max_tree_depth = 200, margin = "auto", n_iters = NULL, delta =
     0.001, max_candidates = NULL, low_memory = TRUE, n_search_trees = 1,
@@ -14,23 +15,25 @@ controls_ann(
   hnsw = list(M = 25, ef_c = 200, ef_s = 200, grain_size = 1, byrow = TRUE),
   lsh = list(bucket_size = 500, hash_width = 10, num_probes = 0, projections = 10, tables
     = 30),
-  annoy = list(n_trees = 250, build_on_disk = FALSE),
   kd = list(algorithm = "dual_tree", epsilon = 0, leaf_size = 20, random_basis = FALSE,
-    rho = 0.7, tau = 0, tree_type = "kd")
+    rho = 0.7, tau = 0, tree_type = "kd"),
+  annoy = list(n_trees = 250, build_on_disk = FALSE)
 )
 }
 \arguments{
 \item{sparse}{whether sparse data should be used as an input for algorithms,}
 
-\item{nnd}{parameters for \code{\link[rnndescent:rnnd_build]{rnndescent::rnnd_build()}} and \code{\link[rnndescent:rnnd_query]{rnndescent::rnnd_query()}},}
+\item{k_search}{number of neighbours to search,}
 
-\item{hnsw}{parameters for \code{\link[RcppHNSW:hnsw_build]{RcppHNSW::hnsw_build()}} and \code{\link[RcppHNSW:hnsw_search]{RcppHNSW::hnsw_search()}},}
+\item{nnd}{list of parameters for \code{\link[rnndescent:rnnd_build]{rnndescent::rnnd_build()}} and \code{\link[rnndescent:rnnd_query]{rnndescent::rnnd_query()}},}
 
-\item{lsh}{parameters for \code{\link[mlpack:lsh]{mlpack::lsh()}},}
+\item{hnsw}{list of parameters for \code{\link[RcppHNSW:hnsw_build]{RcppHNSW::hnsw_build()}} and \code{\link[RcppHNSW:hnsw_search]{RcppHNSW::hnsw_search()}},}
 
-\item{annoy}{parameters for \link{RcppAnnoy} package,}
+\item{lsh}{list of parameters for \code{\link[mlpack:lsh]{mlpack::lsh()}},}
 
-\item{kd}{parameters for \code{\link[mlpack:knn]{mlpack::knn()}} function.}
+\item{annoy}{list of parameters for \link{RcppAnnoy} package.}
+
+\item{lisf}{of kd parameters for \code{\link[mlpack:knn]{mlpack::knn()}} function,}
 }
 \value{
 Returns a list with parameters
diff --git a/man/controls_txt.Rd b/man/controls_txt.Rd
index 5425f65..0e71a13 100644
--- a/man/controls_txt.Rd
+++ b/man/controls_txt.Rd
@@ -16,9 +16,9 @@ controls_txt(
 
 \item{n_chunks}{passed to (default \code{10L}),}
 
-\item{lowercase}{should the caracters be made lowercase? (default \code{TRUE})}
+\item{lowercase}{should the characters be made lowercase? (default \code{TRUE}),}
 
-\item{strip_non_alphanum}{should punctuation and white space be stripped? (default \code{TRUE})}
+\item{strip_non_alphanum}{should punctuation and white space be stripped? (default \code{TRUE}).}
 }
 \value{
 Returns a list with parameters.
diff --git a/man/method_nnd.Rd b/man/method_nnd.Rd
index c73bd0e..aadcae4 100644
--- a/man/method_nnd.Rd
+++ b/man/method_nnd.Rd
@@ -4,7 +4,7 @@
 \alias{method_nnd}
 \title{An internal function to use the NN descent algorithm via the \link{rnndescent} package.}
 \usage{
-method_nnd(x, y, k, distance, verbose, n_threads, control)
+method_nnd(x, y, k, distance, deduplication, verbose, n_threads, control)
 }
 \arguments{
 \item{x}{deduplication or reference data,}
@@ -15,6 +15,8 @@ method_nnd(x, y, k, distance, verbose, n_threads, control)
 
 \item{distance}{type of distance to calculate,}
 
+\item{deduplication}{whether the deduplication is applied,}
+
 \item{verbose}{if TRUE, log messages to the console,}
 
 \item{n_threads}{maximum number of threads to use,}
diff --git a/vignettes/v2-reclin.Rmd b/vignettes/v2-reclin.Rmd
index 329261a..16d312f 100644
--- a/vignettes/v2-reclin.Rmd
+++ b/vignettes/v2-reclin.Rmd
@@ -122,12 +122,6 @@ Let's take a look at the first pair. Obviously there is a typo in the `pername1`
 cbind(t(census[1, 1:9]), t(cis[8152, 1:9]))
 ```
 
-Now, let's look at the 7th pair with the largest distance from the first 10 rows. This seems to be a non-match because only `pername2` and `sex` are the same.
-
-```{r}
-cbind( t(census[8, 1:9]), t(cis[3901, 1:9]))
-```
-
 
 ## Assessing the quality
 
@@ -155,13 +149,14 @@ Let's see how our approach handled this problem.
 result2
 ```
 
-It seems that the default parameters of the NND method result in an FNR of `r sprintf("%.1f",result2$metrics["fnr"]*100)`%, which is quite large. We can see if increasing the number of `k` (and thus `max_candidates`) as suggested in the [Nearest Neighbor Descent
+It seems that the default parameters of the NND method result in an FNR of `r sprintf("%.2f",result2$metrics["fnr"]*100)`%, which is quite large. We can see if increasing the number of `k` (and thus `max_candidates`) as suggested in the [Nearest Neighbor Descent
 ](https://jlmelville.github.io/rnndescent/articles/nearest-neighbor-descent.html) vignette will help. 
 
 
 ```{r}
 set.seed(2024)
 ann_control_pars <- controls_ann()
+ann_control_pars$k_search <- 60
 ann_control_pars$nnd$k_build <- 60
 
 result3 <- blocking(x = census$txt, y = cis$txt, verbose = 1, 
@@ -201,6 +196,7 @@ Computation times are: 16 seconds for NND and about 60 for HNSW (on M2 MacBook A
 Finally, we can compare the results of two ANN algorithms. The overlap between neighbours is given by
 
 ```{r}
-mean(result3$result[order(y)]$x == result4$result[order(y)]$x)*100
+c("no tuning" = mean(result2$result[order(y)]$x == result4$result[order(y)]$x)*100,
+  "with tuning" = mean(result3$result[order(y)]$x == result4$result[order(y)]$x)*100)
 ```