Implement `data_separate()` #431

strengejacke · 2023-06-09T09:14:08Z

Fixes #423

strengejacke · 2023-06-09T11:52:09Z

This is really a long list of examples...
@jmgirard @etiennebacher Most of the function should work, merge_multiple seems not to work yet. Will add tests in the next days...

library(datawizard)
d <- data.frame(x = c(NA, "x.y", "x.z.y", "y.z", "1.2.3"))
d
#>       x
#> 1  <NA>
#> 2   x.y
#> 3 x.z.y
#> 4   y.z
#> 5 1.2.3

data_separate(d)
#>   split_1 split_2
#> 1    <NA>    <NA>
#> 2       x       y
#> 3       x       z
#> 4       y       z
#> 5       1       2

data_separate(d, guess_columns = "max")
#>   split_1 split_2 split_3
#> 1    <NA>    <NA>    <NA>
#> 2       x       y    <NA>
#> 3       x       z       y
#> 4       y       z    <NA>
#> 5       1       2       3

data_separate(d, guess_columns = "min")
#>   split_1
#> 1    <NA>
#> 2       x
#> 3       x
#> 4       y
#> 5       1

# new_columns overwrites guess_columns
data_separate(d, new_columns = c("A", "B"), guess_columns = "min")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, new_columns = c("A", "B"))
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    y <NA>
#> 3    x    z    y
#> 4    y    z <NA>
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "left")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2 <NA>    x    y
#> 3    x    z    y
#> 4 <NA>    y    z
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "right")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    y <NA>
#> 3    x    z    y
#> 4    y    z <NA>
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_left")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    x    y
#> 3    x    z    y
#> 4    y    y    z
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_right")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    y    y
#> 3    x    z    y
#> 4    y    z    z
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B"), extra = "merge_right")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x  z y
#> 4    y    z
#> 5    1  2 3

data_separate(d, new_columns = c("A", "B"), extra = "merge_left")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3  x z    y
#> 4    y    z
#> 5  1 2    3

data_separate(d, new_columns = c("A", "B"), extra = "drop_right")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, new_columns = c("A", "B"), extra = "drop_left")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    z    y
#> 4    y    z
#> 5    2    3



d <- data.frame(
  x = c(NA, "x.y", "x.z.y", "y.z", "1.2.3"),
  y = c(NA, "a.b", "a.b.c", "a.c", "5.6.7")
)
d
#>       x     y
#> 1  <NA>  <NA>
#> 2   x.y   a.b
#> 3 x.z.y a.b.c
#> 4   y.z   a.c
#> 5 1.2.3 5.6.7

data_separate(d)
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       x       z         a         b
#> 4       y       z         a         c
#> 5       1       2         5         6

data_separate(d, new_columns = c("A", "B"))
#>      A    B  A.1  B.1
#> 1 <NA> <NA> <NA> <NA>
#> 2    x    y    a    b
#> 3    x    z    a    b
#> 4    y    z    a    c
#> 5    1    2    5    6

data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7

data_separate(d, select = "x")
#>   split_1 split_2
#> 1    <NA>    <NA>
#> 2       x       y
#> 3       x       z
#> 4       y       z
#> 5       1       2

data_separate(d, select = "x", new_columns = c("A", "B"))
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, select = "x", new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B
#> 1  <NA>  <NA> <NA> <NA>
#> 2   x.y   a.b    x    y
#> 3 x.z.y a.b.c    x    z
#> 4   y.z   a.c    y    z
#> 5 1.2.3 5.6.7    1    2

data_separate(d, append = TRUE)
#>       x     y split_1 split_2 split_1.1 split_2.1
#> 1  <NA>  <NA>    <NA>    <NA>      <NA>      <NA>
#> 2   x.y   a.b       x       y         a         b
#> 3 x.z.y a.b.c       x       z         a         b
#> 4   y.z   a.c       y       z         a         c
#> 5 1.2.3 5.6.7       1       2         5         6

data_separate(d, new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B  A.1  B.1
#> 1  <NA>  <NA> <NA> <NA> <NA> <NA>
#> 2   x.y   a.b    x    y    a    b
#> 3 x.z.y a.b.c    x    z    a    b
#> 4   y.z   a.c    y    z    a    c
#> 5 1.2.3 5.6.7    1    2    5    6



data_separate(d, extra = "drop_left")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       z       y         b         c
#> 4       y       z         a         c
#> 5       2       3         6         7

data_separate(d, extra = "drop_right")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       x       z         a         b
#> 4       y       z         a         c
#> 5       1       2         5         6

data_separate(d, extra = "merge_left")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3     x z       y       a b         c
#> 4       y       z         a         c
#> 5     1 2       3       5 6         7

data_separate(d, extra = "merge_right")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       x     z y         a       b c
#> 4       y       z         a         c
#> 5       1     2 3         5       6 7



data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "left")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2 <NA>    x    y <NA>    a    b
#> 3    x    z    y    a    b    c
#> 4 <NA>    y    z <NA>    a    c
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "right")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_left")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    x    y    a    a    b
#> 3    x    z    y    a    b    c
#> 4    y    y    z    a    a    c
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_right")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y    y    a    b    b
#> 3    x    z    y    a    b    c
#> 4    y    z    z    a    c    c
#> 5    1    2    3    5    6    7



data_separate(d, select = "x")
#>   split_1 split_2
#> 1    <NA>    <NA>
#> 2       x       y
#> 3       x       z
#> 4       y       z
#> 5       1       2

data_separate(d, select = "x", new_columns = c("A", "B"))
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, select = "x", new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B
#> 1  <NA>  <NA> <NA> <NA>
#> 2   x.y   a.b    x    y
#> 3 x.z.y a.b.c    x    z
#> 4   y.z   a.c    y    z
#> 5 1.2.3 5.6.7    1    2

data_separate(d, append = TRUE)
#>       x     y split_1 split_2 split_1.1 split_2.1
#> 1  <NA>  <NA>    <NA>    <NA>      <NA>      <NA>
#> 2   x.y   a.b       x       y         a         b
#> 3 x.z.y a.b.c       x       z         a         b
#> 4   y.z   a.c       y       z         a         c
#> 5 1.2.3 5.6.7       1       2         5         6

data_separate(d, new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B  A.1  B.1
#> 1  <NA>  <NA> <NA> <NA> <NA> <NA>
#> 2   x.y   a.b    x    y    a    b
#> 3 x.z.y a.b.c    x    z    a    b
#> 4   y.z   a.c    y    z    a    c
#> 5 1.2.3 5.6.7    1    2    5    6



d <- data.frame(
  x = c(NA, "abcdefghijk", "hijklmnopqr", "lmnopqrstuvw", "pqrstuvwxyz"),
  y = c(NA, "12234567i89", "545643543j5jkjkl", "434234234jlk432423", "45543kljkjk45435345345")
)
d
#>              x                      y
#> 1         <NA>                   <NA>
#> 2  abcdefghijk            12234567i89
#> 3  hijklmnopqr       545643543j5jkjkl
#> 4 lmnopqrstuvw     434234234jlk432423
#> 5  pqrstuvwxyz 45543kljkjk45435345345

data_separate(d, separator = c(2, 5, 7))
#>   split_1 split_2 split_3 split_4 split_5 split_1.1 split_2.1 split_3.1
#> 1    <NA>      ab      hi      lm      pq      <NA>        12        54
#> 2    <NA>   cdefg   jklmn   nopqr   rstuv      <NA>     23456     56435
#> 3    <NA>    hijk    opqr   stuvw    wxyz      <NA>      7i89   43j5jkj
#> 4    <NA>    <NA>    <NA>    <NA>    <NA>      <NA>      <NA>      <NA>
#>   split_4.1 split_5.1
#> 1        43        45
#> 2     42342     543kl
#> 3   34jlk43   jkjk454
#> 4      <NA>      <NA>

^{Created on 2023-06-09 with reprex v2.0.2}

strengejacke · 2023-06-09T12:30:22Z

ok, merge multiple split columns and numeric separator works now:

library(datawizard)
d <- data.frame(
  x = c(NA, "x.y", "x.z.y", "y.z", "1.2.3"),
  y = c(NA, "a.b", "a.b.c", "a.c", "5.6.7")
)
d
#>       x     y
#> 1  <NA>  <NA>
#> 2   x.y   a.b
#> 3 x.z.y a.b.c
#> 4   y.z   a.c
#> 5 1.2.3 5.6.7

data_separate(d, new_columns = c("A", "B"))
#>      A    B  A.1  B.1
#> 1 <NA> <NA> <NA> <NA>
#> 2    x    y    a    b
#> 3    x    z    a    b
#> 4    y    z    a    c
#> 5    1    2    5    6
data_separate(d, new_columns = c("A", "B"), merge_multiple = TRUE)
#>       A     B
#> 1 NA NA NA NA
#> 2   x a   y b
#> 3   x a   z b
#> 4   y a   z c
#> 5   1 5   2 6

data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7
data_separate(d, new_columns = c("A", "B", "C"), merge_multiple = TRUE)
#>       A     B     C
#> 1 NA NA NA NA NA NA
#> 2   x a   y b NA NA
#> 3   x a   z b   y c
#> 4   y a   z c NA NA
#> 5   1 5   2 6   3 7

data_separate(d, extra = "drop_left")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       z       y         b         c
#> 4       y       z         a         c
#> 5       2       3         6         7
data_separate(d, extra = "drop_left", merge_multiple = TRUE)
#>   split_1 split_2
#> 1   NA NA   NA NA
#> 2     x a     y b
#> 3     z b     y c
#> 4     y a     z c
#> 5     2 6     3 7

d <- data.frame(
  x = c(NA, "abcdefghijk", "hijklmnopqr", "lmnopqrstuvw", "pqrstuvwxyz"),
  y = c(NA, "12234567i89", "545643543j5jkjkl", "434234234jlk432423", "45543kljkjk45435345345")
)
d
#>              x                      y
#> 1         <NA>                   <NA>
#> 2  abcdefghijk            12234567i89
#> 3  hijklmnopqr       545643543j5jkjkl
#> 4 lmnopqrstuvw     434234234jlk432423
#> 5  pqrstuvwxyz 45543kljkjk45435345345

data_separate(d, separator = c(2, 5, 7))
#>    split_1 split_2 split_3 split_4 split_1.1 split_2.1 split_3.1
#> V1    <NA>    <NA>    <NA>    <NA>      <NA>      <NA>      <NA>
#> V2       a     bcd      ef   ghijk         1       223        45
#> V3       h     ijk      lm   nopqr         5       456        43
#> V4       l     mno      pq  rstuvw         4       342        34
#> V5       p     qrs      tu   vwxyz         4       554        3k
#>           split_4.1
#> V1             <NA>
#> V2            67i89
#> V3       543j5jkjkl
#> V4     234jlk432423
#> V5 ljkjk45435345345
data_separate(d, separator = c(2, 5, 7), merge_multiple = TRUE)
#>    split_1 split_2 split_3                split_4
#> V1   NA NA   NA NA   NA NA                  NA NA
#> V2     a 1 bcd 223   ef 45            ghijk 67i89
#> V3     h 5 ijk 456   lm 43       nopqr 543j5jkjkl
#> V4     l 4 mno 342   pq 34    rstuvw 234jlk432423
#> V5     p 4 qrs 554   tu 3k vwxyz ljkjk45435345345

data_separate(d, separator = c(3, 7, 11))
#>    split_1 split_2 split_3 split_4 split_1.1 split_2.1 split_3.1    split_4.1
#> V1    <NA>    <NA>    <NA>    <NA>      <NA>      <NA>      <NA>         <NA>
#> V2      ab    cdef    ghij       k        12      2345      67i8            9
#> V3      hi    jklm    nopq       r        54      5643      543j       5jkjkl
#> V4      lm    nopq    rstu      vw        43      4234      234j     lk432423
#> V5      pq    rstu    vwxy       z        45      543k      ljkj k45435345345
data_separate(d, separator = c(3, 7, 11), merge_multiple = TRUE)
#>    split_1   split_2   split_3        split_4
#> V1   NA NA     NA NA     NA NA          NA NA
#> V2   ab 12 cdef 2345 ghij 67i8            k 9
#> V3   hi 54 jklm 5643 nopq 543j       r 5jkjkl
#> V4   lm 43 nopq 4234 rstu 234j    vw lk432423
#> V5   pq 45 rstu 543k vwxy ljkj z k45435345345

^{Created on 2023-06-09 with reprex v2.0.2}

etiennebacher

Before reviewing the code, I'd like to discuss a bit about the behavior of data_separate() because there are things that are weird to me (sorry I feel like I'm the guy always criticizing your implementation of functions ^^).

Basically there are two things that I don't like in the current behavior:

the fact that it can be applied on several cols
automatic naming of new cols (which is a consequence of the first point)

I feel that separating several columns at once is not desirable because it's harder to control the output. Maybe you have use cases for this, but personally I've always needed separate() on one column at a time because I want to control exactly the new columns that will be generated.

Also, I think being able to split only one col at a time leads to more readable code. For instance, some code with the current implementation would be:

library(datawizard)

# multiple columns to split
d <- data.frame(
  dep_date = c("2022-07-02", "2001-09-11", "2010-12-24"),
  arr_date = c("2023-01-02", "2011-12-10", "2011-01-24"),
  stringsAsFactors = FALSE
)
d
#>     dep_date   arr_date
#> 1 2022-07-02 2023-01-02
#> 2 2001-09-11 2011-12-10
#> 3 2010-12-24 2011-01-24

# split two columns, default column names
d |> 
  data_separate(select = contains("date")) |> 
  data_rename(
    pattern = c("split_1", "split_2", "split_3", 
                "split_1.1", "split_2.1", "split_3.1"),
    replacement = c("year_dep", "month_dep", "day_dep", 
                    "year_arr", "month_arr", "day_arr")
  )
#> Column `dep_date` had different number of values after splitting.
#>   Variable was split into 3 columns.
#> Column `arr_date` had different number of values after splitting.
#>   Variable was split into 3 columns.
#>   year_dep month_dep day_dep year_arr month_arr day_arr
#> 1     2022        07      02     2023        01      02
#> 2     2001        09      11     2011        12      10
#> 3     2010        12      24     2011        01      24

We are forced to rename the new cols because it's very unlikely that the generated names are satisfying. The problem is that depending if the order of the two columns changes in the original data, then the renaming will be false.

I think it would be more readable and safe to have something like this:

d |> 
  data_separate(dep_date, new_columns = c("year_dep", "month_dep", "day_dep")) |> 
  data_separate(arr_date, new_columns = c("year_arr", "month_arr", "day_arr"))

In summary, I think we should remove:

the automatic column renaming and therefore the arg guess_columns
separating several columns at once and therefore the args merge_multiple and merge_separator (which I find quite confusing and I really don't see a use case for them).

What do you think?

strengejacke · 2023-06-11T09:43:43Z

I'm not sure if this code can be reached:

    # check if column names should be recycled
    if (ncol(out) != length(new_column_names)) {
      # recycle names, avoid duplicates
      new_column_names <- make.unique(rep(new_column_names, times = ncol(out) / new_column_names))
    }

else, we have 100% code coverage in tests. Snapshot outputs are validated.

strengejacke · 2023-06-11T13:18:36Z

What about this:

library(datawizard)
# separate multiple columns, give proper column names
d_sep <- data.frame(
  x = c("1.a.6", "2.b.7.d", "3.c.8", "5.j"),
  y = c("m.n.99.22", "77.f.g.34", "44.9", NA),
  stringsAsFactors = FALSE
)
data_separate(
  d_sep,
  select = c("x", "y"),
  new_columns = list(
    x = c("A", "B", "C"), # separate "x" into three columns
    y = c("EE", "FF", "GG", "HH") # separate "y" into four columns
  ),
  verbose = FALSE
)
#>   A B    C   EE   FF   GG   HH
#> 1 1 a    6    m    n   99   22
#> 2 2 b    7   77    f    g   34
#> 3 3 c    8   44    9 <NA> <NA>
#> 4 5 j <NA> <NA> <NA> <NA> <NA>

^{Created on 2023-06-11 with reprex v2.0.2}

etiennebacher · 2023-06-12T06:43:52Z

Separating multiple cols

Actually, I'm fine with separating several columns at the same time. I can see this happening if you have multiple columns with dates for example. I like specifying new_columns as a list like above. If new_cols is just one vector then you could add the original varname as prefix instead of adding .1, .2 etc as suffix. For example, this code:

data_separate(data, c("date1", "date2", "date3", etc.), new_columns = c("year", "month", "day"))

would create "date1_year", "date1_month", "date1_day", "date2_year", etc. instead of "year", "month", "day", "year.1", "month.1"...

Guessing columns

I still don't see the point of guess_columns though because we have to rename columns after anyway. We can let it if you prefer but I'd like it to be an explicit choice by the user rather than a default if new_columns is missing. So in the example below, instead of automatically splitting data, I think having an error Please specify either `new_columns` or `guess_columns`. would be better:

d <- data.frame(
  x = c("1.a.6", "2.b.7", "3.c.8"),
  stringsAsFactors = FALSE
)
d
#>       x
#> 1 1.a.6
#> 2 2.b.7
#> 3 3.c.8
datawizard::data_separate(d)
#> Column `x` had different number of values after splitting. Variable was
#>   split into 3 columns.
#>   split_1 split_2 split_3
#> 1       1       a       6
#> 2       2       b       7
#> 3       3       c       8

Same for the merge_multiple, I don't see the point but just let it if you want

… names

strengejacke · 2023-06-12T09:00:58Z

library(datawizard)
d_sep <- data.frame(
  x = c("1.a.6", "2.b.7.d", "3.c.8", "5.j"),
  y = c("m.n.99.22", "77.f.g.34", "44.9", NA),
  stringsAsFactors = FALSE
)

data_separate(d_sep)
#> Error: Cannot separate values. Either `new_columns` or `guess_columns` must be
#>   provided.

data_separate(d_sep, guess_columns = "mode")
#> Column `x` had different number of values after splitting. Variable was
#>   split into 3 columns.
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `x`returned fewer columns than expected after splitting. Right-most
#>   columns were filled with `NA`.
#> Column `y` had different number of values after splitting. Variable was
#>   split into 4 columns.
#> `y`returned fewer columns than expected after splitting. Right-most
#>   columns were filled with `NA`.
#>   x_1 x_2  x_3  y_1  y_2  y_3  y_4
#> 1   1   a    6    m    n   99   22
#> 2   2   b    7   77    f    g   34
#> 3   3   c    8   44    9 <NA> <NA>
#> 4   5   j <NA> <NA> <NA> <NA> <NA>

data_separate(d_sep, new_columns = c("AA", "BB"))
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `y` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#>   x_AA x_BB y_AA y_BB
#> 1    1    a    m    n
#> 2    2    b   77    f
#> 3    3    c   44    9
#> 4    5    j <NA> <NA>

data_separate(d_sep, new_columns = c("AA", "BB"), merge_multiple = TRUE)
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `y` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#>    AA  BB
#> 1  1m  an
#> 2 277  bf
#> 3 344  c9
#> 4 5NA jNA

data_separate(d_sep, new_columns = list(x = c("AA", "BB"), y = c("KK", "LL")))
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `y` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#>   AA BB   KK   LL
#> 1  1  a    m    n
#> 2  2  b   77    f
#> 3  3  c   44    9
#> 4  5  j <NA> <NA>

^{Created on 2023-06-12 with reprex v2.0.2}

etiennebacher

LGTM, tests are very complete, just a few points (mostly about an unwanted message and the append arg). Thank you @strengejacke

R/data_separate.R

tests/testthat/test-data_separate.R

etiennebacher · 2023-06-13T06:54:30Z

Also can you udpate the "coming from tidyverse" vignette with this function?

strengejacke · 2023-06-13T09:57:00Z

Also can you udpate the "coming from tidyverse" vignette with this function?

I'm not sure about all the : and {}, and don't know which part of the vignette is required and can/should be copy-pasted to update the vignette... :-/

strengejacke · 2023-06-13T09:57:58Z

I'll give it a try

strengejacke · 2023-06-13T10:11:25Z

Can you look at the changes I made to the vignette?

etiennebacher · 2023-06-13T10:52:31Z

Yes it looks fine

datawizard versions of separate() and unite()

100bbbe

Fixes #423

This comment was marked as outdated.

Sign in to view

strengejacke added 5 commits June 9, 2023 12:08

progress

b63ff34

more code

5c635a5

capture return value

7208998

desc, news, cross ref

1267ce5

fix

316fec0

This comment was marked as outdated.

Sign in to view

strengejacke added 2 commits June 9, 2023 14:24

fix numeric separator arg

70dde12

fix merge multiple

b461e5e

strengejacke added 13 commits June 9, 2023 15:03

docs, examples

cca9c3e

minor

3565e60

verbose

7ea9f16

more verbose

9e73125

start adding tests

f0a03c6

add to pkgdown

35ed0d9

fix test

87e02cd

changes

31c0d6b

remove code that cannot be reached, add test

f81ba8c

add snapshots

666b9fd

typo

c1133f8

docs

06d5dbb

test

751ba07

strengejacke marked this pull request as ready for review June 9, 2023 16:28

strengejacke requested a review from etiennebacher June 9, 2023 16:29

typo

d5a0ff4

etiennebacher reviewed Jun 10, 2023

View reviewed changes

etiennebacher changed the title ~~datawizard versions of separate() and unite()~~ Implement data_separate() Jun 10, 2023

strengejacke added 2 commits June 11, 2023 11:37

remove code that can't be reached

d886d3e

add tests

cf33a26

strengejacke added 2 commits June 11, 2023 11:47

minor

6f7558d

can never be reached

77a1965

allow list of column names, add tests

5605ddf

strengejacke added 2 commits June 12, 2023 10:38

one arg is required, update tests, different pattern of unique column…

fdc93b8

… names

user better names instead of split

74cbb0c

This comment was marked as outdated.

Sign in to view

strengejacke added 2 commits June 12, 2023 10:55

update RD

bf9bc71

better msg

489d343

strengejacke added 2 commits June 12, 2023 11:02

snapshot

99468da

docs (examples)

4ff09a5

etiennebacher approved these changes Jun 13, 2023

View reviewed changes

strengejacke added 3 commits June 13, 2023 10:01

address comments

dc5982b

update tests

061a44b

address last comments, fix test

09585bf

strengejacke added 2 commits June 13, 2023 12:12

vignette

a64bddc

update chunk names

3e88d1f

strengejacke merged commit 756f3a6 into main Jun 13, 2023

strengejacke deleted the strengejacke/issue423 branch June 13, 2023 10:44

final fixes

825dc77

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Implement `data_separate()` #431

Implement `data_separate()` #431

strengejacke commented Jun 9, 2023

This comment was marked as outdated.

strengejacke commented Jun 9, 2023

This comment was marked as outdated.

strengejacke commented Jun 9, 2023

etiennebacher left a comment

strengejacke commented Jun 11, 2023

strengejacke commented Jun 11, 2023

etiennebacher commented Jun 12, 2023

This comment was marked as outdated.

strengejacke commented Jun 12, 2023

etiennebacher left a comment •

edited

Loading

etiennebacher commented Jun 13, 2023

strengejacke commented Jun 13, 2023

strengejacke commented Jun 13, 2023

strengejacke commented Jun 13, 2023

etiennebacher commented Jun 13, 2023

Implement data_separate() #431

Implement data_separate() #431

Conversation

strengejacke commented Jun 9, 2023

This comment was marked as outdated.

strengejacke commented Jun 9, 2023

This comment was marked as outdated.

strengejacke commented Jun 9, 2023

etiennebacher left a comment

Choose a reason for hiding this comment

strengejacke commented Jun 11, 2023

strengejacke commented Jun 11, 2023

etiennebacher commented Jun 12, 2023

Separating multiple cols

Guessing columns

This comment was marked as outdated.

strengejacke commented Jun 12, 2023

etiennebacher left a comment • edited Loading

Choose a reason for hiding this comment

etiennebacher commented Jun 13, 2023

strengejacke commented Jun 13, 2023

strengejacke commented Jun 13, 2023

strengejacke commented Jun 13, 2023

etiennebacher commented Jun 13, 2023

Implement `data_separate()` #431

Implement `data_separate()` #431

etiennebacher left a comment •

edited

Loading