Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement data_separate() #431

Merged
merged 41 commits into from
Jun 13, 2023
Merged

Implement data_separate() #431

merged 41 commits into from
Jun 13, 2023

Conversation

strengejacke
Copy link
Member

Fixes #423

@codecov-commenter

This comment was marked as outdated.

@strengejacke
Copy link
Member Author

This is really a long list of examples...
@jmgirard @etiennebacher Most of the function should work, merge_multiple seems not to work yet. Will add tests in the next days...

library(datawizard)
d <- data.frame(x = c(NA, "x.y", "x.z.y", "y.z", "1.2.3"))
d
#>       x
#> 1  <NA>
#> 2   x.y
#> 3 x.z.y
#> 4   y.z
#> 5 1.2.3

data_separate(d)
#>   split_1 split_2
#> 1    <NA>    <NA>
#> 2       x       y
#> 3       x       z
#> 4       y       z
#> 5       1       2

data_separate(d, guess_columns = "max")
#>   split_1 split_2 split_3
#> 1    <NA>    <NA>    <NA>
#> 2       x       y    <NA>
#> 3       x       z       y
#> 4       y       z    <NA>
#> 5       1       2       3

data_separate(d, guess_columns = "min")
#>   split_1
#> 1    <NA>
#> 2       x
#> 3       x
#> 4       y
#> 5       1

# new_columns overwrites guess_columns
data_separate(d, new_columns = c("A", "B"), guess_columns = "min")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, new_columns = c("A", "B"))
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    y <NA>
#> 3    x    z    y
#> 4    y    z <NA>
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "left")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2 <NA>    x    y
#> 3    x    z    y
#> 4 <NA>    y    z
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "right")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    y <NA>
#> 3    x    z    y
#> 4    y    z <NA>
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_left")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    x    y
#> 3    x    z    y
#> 4    y    y    z
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_right")
#>      A    B    C
#> 1 <NA> <NA> <NA>
#> 2    x    y    y
#> 3    x    z    y
#> 4    y    z    z
#> 5    1    2    3

data_separate(d, new_columns = c("A", "B"), extra = "merge_right")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x  z y
#> 4    y    z
#> 5    1  2 3

data_separate(d, new_columns = c("A", "B"), extra = "merge_left")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3  x z    y
#> 4    y    z
#> 5  1 2    3

data_separate(d, new_columns = c("A", "B"), extra = "drop_right")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, new_columns = c("A", "B"), extra = "drop_left")
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    z    y
#> 4    y    z
#> 5    2    3



d <- data.frame(
  x = c(NA, "x.y", "x.z.y", "y.z", "1.2.3"),
  y = c(NA, "a.b", "a.b.c", "a.c", "5.6.7")
)
d
#>       x     y
#> 1  <NA>  <NA>
#> 2   x.y   a.b
#> 3 x.z.y a.b.c
#> 4   y.z   a.c
#> 5 1.2.3 5.6.7

data_separate(d)
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       x       z         a         b
#> 4       y       z         a         c
#> 5       1       2         5         6

data_separate(d, new_columns = c("A", "B"))
#>      A    B  A.1  B.1
#> 1 <NA> <NA> <NA> <NA>
#> 2    x    y    a    b
#> 3    x    z    a    b
#> 4    y    z    a    c
#> 5    1    2    5    6

data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7

data_separate(d, select = "x")
#>   split_1 split_2
#> 1    <NA>    <NA>
#> 2       x       y
#> 3       x       z
#> 4       y       z
#> 5       1       2

data_separate(d, select = "x", new_columns = c("A", "B"))
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, select = "x", new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B
#> 1  <NA>  <NA> <NA> <NA>
#> 2   x.y   a.b    x    y
#> 3 x.z.y a.b.c    x    z
#> 4   y.z   a.c    y    z
#> 5 1.2.3 5.6.7    1    2

data_separate(d, append = TRUE)
#>       x     y split_1 split_2 split_1.1 split_2.1
#> 1  <NA>  <NA>    <NA>    <NA>      <NA>      <NA>
#> 2   x.y   a.b       x       y         a         b
#> 3 x.z.y a.b.c       x       z         a         b
#> 4   y.z   a.c       y       z         a         c
#> 5 1.2.3 5.6.7       1       2         5         6

data_separate(d, new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B  A.1  B.1
#> 1  <NA>  <NA> <NA> <NA> <NA> <NA>
#> 2   x.y   a.b    x    y    a    b
#> 3 x.z.y a.b.c    x    z    a    b
#> 4   y.z   a.c    y    z    a    c
#> 5 1.2.3 5.6.7    1    2    5    6



data_separate(d, extra = "drop_left")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       z       y         b         c
#> 4       y       z         a         c
#> 5       2       3         6         7

data_separate(d, extra = "drop_right")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       x       z         a         b
#> 4       y       z         a         c
#> 5       1       2         5         6

data_separate(d, extra = "merge_left")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3     x z       y       a b         c
#> 4       y       z         a         c
#> 5     1 2       3       5 6         7

data_separate(d, extra = "merge_right")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       x     z y         a       b c
#> 4       y       z         a         c
#> 5       1     2 3         5       6 7



data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "left")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2 <NA>    x    y <NA>    a    b
#> 3    x    z    y    a    b    c
#> 4 <NA>    y    z <NA>    a    c
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "right")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_left")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    x    y    a    a    b
#> 3    x    z    y    a    b    c
#> 4    y    y    z    a    a    c
#> 5    1    2    3    5    6    7

data_separate(d, new_columns = c("A", "B", "C"), fill = "value_right")
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y    y    a    b    b
#> 3    x    z    y    a    b    c
#> 4    y    z    z    a    c    c
#> 5    1    2    3    5    6    7



data_separate(d, select = "x")
#>   split_1 split_2
#> 1    <NA>    <NA>
#> 2       x       y
#> 3       x       z
#> 4       y       z
#> 5       1       2

data_separate(d, select = "x", new_columns = c("A", "B"))
#>      A    B
#> 1 <NA> <NA>
#> 2    x    y
#> 3    x    z
#> 4    y    z
#> 5    1    2

data_separate(d, select = "x", new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B
#> 1  <NA>  <NA> <NA> <NA>
#> 2   x.y   a.b    x    y
#> 3 x.z.y a.b.c    x    z
#> 4   y.z   a.c    y    z
#> 5 1.2.3 5.6.7    1    2

data_separate(d, append = TRUE)
#>       x     y split_1 split_2 split_1.1 split_2.1
#> 1  <NA>  <NA>    <NA>    <NA>      <NA>      <NA>
#> 2   x.y   a.b       x       y         a         b
#> 3 x.z.y a.b.c       x       z         a         b
#> 4   y.z   a.c       y       z         a         c
#> 5 1.2.3 5.6.7       1       2         5         6

data_separate(d, new_columns = c("A", "B"), append = TRUE)
#>       x     y    A    B  A.1  B.1
#> 1  <NA>  <NA> <NA> <NA> <NA> <NA>
#> 2   x.y   a.b    x    y    a    b
#> 3 x.z.y a.b.c    x    z    a    b
#> 4   y.z   a.c    y    z    a    c
#> 5 1.2.3 5.6.7    1    2    5    6



d <- data.frame(
  x = c(NA, "abcdefghijk", "hijklmnopqr", "lmnopqrstuvw", "pqrstuvwxyz"),
  y = c(NA, "12234567i89", "545643543j5jkjkl", "434234234jlk432423", "45543kljkjk45435345345")
)
d
#>              x                      y
#> 1         <NA>                   <NA>
#> 2  abcdefghijk            12234567i89
#> 3  hijklmnopqr       545643543j5jkjkl
#> 4 lmnopqrstuvw     434234234jlk432423
#> 5  pqrstuvwxyz 45543kljkjk45435345345

data_separate(d, separator = c(2, 5, 7))
#>   split_1 split_2 split_3 split_4 split_5 split_1.1 split_2.1 split_3.1
#> 1    <NA>      ab      hi      lm      pq      <NA>        12        54
#> 2    <NA>   cdefg   jklmn   nopqr   rstuv      <NA>     23456     56435
#> 3    <NA>    hijk    opqr   stuvw    wxyz      <NA>      7i89   43j5jkj
#> 4    <NA>    <NA>    <NA>    <NA>    <NA>      <NA>      <NA>      <NA>
#>   split_4.1 split_5.1
#> 1        43        45
#> 2     42342     543kl
#> 3   34jlk43   jkjk454
#> 4      <NA>      <NA>

Created on 2023-06-09 with reprex v2.0.2

@strengejacke

This comment was marked as outdated.

@strengejacke
Copy link
Member Author

ok, merge multiple split columns and numeric separator works now:

library(datawizard)
d <- data.frame(
  x = c(NA, "x.y", "x.z.y", "y.z", "1.2.3"),
  y = c(NA, "a.b", "a.b.c", "a.c", "5.6.7")
)
d
#>       x     y
#> 1  <NA>  <NA>
#> 2   x.y   a.b
#> 3 x.z.y a.b.c
#> 4   y.z   a.c
#> 5 1.2.3 5.6.7

data_separate(d, new_columns = c("A", "B"))
#>      A    B  A.1  B.1
#> 1 <NA> <NA> <NA> <NA>
#> 2    x    y    a    b
#> 3    x    z    a    b
#> 4    y    z    a    c
#> 5    1    2    5    6
data_separate(d, new_columns = c("A", "B"), merge_multiple = TRUE)
#>       A     B
#> 1 NA NA NA NA
#> 2   x a   y b
#> 3   x a   z b
#> 4   y a   z c
#> 5   1 5   2 6

data_separate(d, new_columns = c("A", "B", "C"))
#>      A    B    C  A.1  B.1  C.1
#> 1 <NA> <NA> <NA> <NA> <NA> <NA>
#> 2    x    y <NA>    a    b <NA>
#> 3    x    z    y    a    b    c
#> 4    y    z <NA>    a    c <NA>
#> 5    1    2    3    5    6    7
data_separate(d, new_columns = c("A", "B", "C"), merge_multiple = TRUE)
#>       A     B     C
#> 1 NA NA NA NA NA NA
#> 2   x a   y b NA NA
#> 3   x a   z b   y c
#> 4   y a   z c NA NA
#> 5   1 5   2 6   3 7

data_separate(d, extra = "drop_left")
#>   split_1 split_2 split_1.1 split_2.1
#> 1    <NA>    <NA>      <NA>      <NA>
#> 2       x       y         a         b
#> 3       z       y         b         c
#> 4       y       z         a         c
#> 5       2       3         6         7
data_separate(d, extra = "drop_left", merge_multiple = TRUE)
#>   split_1 split_2
#> 1   NA NA   NA NA
#> 2     x a     y b
#> 3     z b     y c
#> 4     y a     z c
#> 5     2 6     3 7

d <- data.frame(
  x = c(NA, "abcdefghijk", "hijklmnopqr", "lmnopqrstuvw", "pqrstuvwxyz"),
  y = c(NA, "12234567i89", "545643543j5jkjkl", "434234234jlk432423", "45543kljkjk45435345345")
)
d
#>              x                      y
#> 1         <NA>                   <NA>
#> 2  abcdefghijk            12234567i89
#> 3  hijklmnopqr       545643543j5jkjkl
#> 4 lmnopqrstuvw     434234234jlk432423
#> 5  pqrstuvwxyz 45543kljkjk45435345345

data_separate(d, separator = c(2, 5, 7))
#>    split_1 split_2 split_3 split_4 split_1.1 split_2.1 split_3.1
#> V1    <NA>    <NA>    <NA>    <NA>      <NA>      <NA>      <NA>
#> V2       a     bcd      ef   ghijk         1       223        45
#> V3       h     ijk      lm   nopqr         5       456        43
#> V4       l     mno      pq  rstuvw         4       342        34
#> V5       p     qrs      tu   vwxyz         4       554        3k
#>           split_4.1
#> V1             <NA>
#> V2            67i89
#> V3       543j5jkjkl
#> V4     234jlk432423
#> V5 ljkjk45435345345
data_separate(d, separator = c(2, 5, 7), merge_multiple = TRUE)
#>    split_1 split_2 split_3                split_4
#> V1   NA NA   NA NA   NA NA                  NA NA
#> V2     a 1 bcd 223   ef 45            ghijk 67i89
#> V3     h 5 ijk 456   lm 43       nopqr 543j5jkjkl
#> V4     l 4 mno 342   pq 34    rstuvw 234jlk432423
#> V5     p 4 qrs 554   tu 3k vwxyz ljkjk45435345345

data_separate(d, separator = c(3, 7, 11))
#>    split_1 split_2 split_3 split_4 split_1.1 split_2.1 split_3.1    split_4.1
#> V1    <NA>    <NA>    <NA>    <NA>      <NA>      <NA>      <NA>         <NA>
#> V2      ab    cdef    ghij       k        12      2345      67i8            9
#> V3      hi    jklm    nopq       r        54      5643      543j       5jkjkl
#> V4      lm    nopq    rstu      vw        43      4234      234j     lk432423
#> V5      pq    rstu    vwxy       z        45      543k      ljkj k45435345345
data_separate(d, separator = c(3, 7, 11), merge_multiple = TRUE)
#>    split_1   split_2   split_3        split_4
#> V1   NA NA     NA NA     NA NA          NA NA
#> V2   ab 12 cdef 2345 ghij 67i8            k 9
#> V3   hi 54 jklm 5643 nopq 543j       r 5jkjkl
#> V4   lm 43 nopq 4234 rstu 234j    vw lk432423
#> V5   pq 45 rstu 543k vwxy ljkj z k45435345345

Created on 2023-06-09 with reprex v2.0.2

@strengejacke strengejacke marked this pull request as ready for review June 9, 2023 16:28
Copy link
Member

@etiennebacher etiennebacher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before reviewing the code, I'd like to discuss a bit about the behavior of data_separate() because there are things that are weird to me (sorry I feel like I'm the guy always criticizing your implementation of functions ^^).

Basically there are two things that I don't like in the current behavior:

  • the fact that it can be applied on several cols
  • automatic naming of new cols (which is a consequence of the first point)

I feel that separating several columns at once is not desirable because it's harder to control the output. Maybe you have use cases for this, but personally I've always needed separate() on one column at a time because I want to control exactly the new columns that will be generated.

Also, I think being able to split only one col at a time leads to more readable code. For instance, some code with the current implementation would be:

library(datawizard)

# multiple columns to split
d <- data.frame(
  dep_date = c("2022-07-02", "2001-09-11", "2010-12-24"),
  arr_date = c("2023-01-02", "2011-12-10", "2011-01-24"),
  stringsAsFactors = FALSE
)
d
#>     dep_date   arr_date
#> 1 2022-07-02 2023-01-02
#> 2 2001-09-11 2011-12-10
#> 3 2010-12-24 2011-01-24

# split two columns, default column names
d |> 
  data_separate(select = contains("date")) |> 
  data_rename(
    pattern = c("split_1", "split_2", "split_3", 
                "split_1.1", "split_2.1", "split_3.1"),
    replacement = c("year_dep", "month_dep", "day_dep", 
                    "year_arr", "month_arr", "day_arr")
  )
#> Column `dep_date` had different number of values after splitting.
#>   Variable was split into 3 columns.
#> Column `arr_date` had different number of values after splitting.
#>   Variable was split into 3 columns.
#>   year_dep month_dep day_dep year_arr month_arr day_arr
#> 1     2022        07      02     2023        01      02
#> 2     2001        09      11     2011        12      10
#> 3     2010        12      24     2011        01      24

We are forced to rename the new cols because it's very unlikely that the generated names are satisfying. The problem is that depending if the order of the two columns changes in the original data, then the renaming will be false.

I think it would be more readable and safe to have something like this:

d |> 
  data_separate(dep_date, new_columns = c("year_dep", "month_dep", "day_dep")) |> 
  data_separate(arr_date, new_columns = c("year_arr", "month_arr", "day_arr"))

In summary, I think we should remove:

  • the automatic column renaming and therefore the arg guess_columns
  • separating several columns at once and therefore the args merge_multiple and merge_separator (which I find quite confusing and I really don't see a use case for them).

What do you think?

@etiennebacher etiennebacher changed the title datawizard versions of separate() and unite() Implement data_separate() Jun 10, 2023
@strengejacke
Copy link
Member Author

I'm not sure if this code can be reached:

    # check if column names should be recycled
    if (ncol(out) != length(new_column_names)) {
      # recycle names, avoid duplicates
      new_column_names <- make.unique(rep(new_column_names, times = ncol(out) / new_column_names))
    }

else, we have 100% code coverage in tests. Snapshot outputs are validated.

@strengejacke
Copy link
Member Author

What about this:

library(datawizard)
# separate multiple columns, give proper column names
d_sep <- data.frame(
  x = c("1.a.6", "2.b.7.d", "3.c.8", "5.j"),
  y = c("m.n.99.22", "77.f.g.34", "44.9", NA),
  stringsAsFactors = FALSE
)
data_separate(
  d_sep,
  select = c("x", "y"),
  new_columns = list(
    x = c("A", "B", "C"), # separate "x" into three columns
    y = c("EE", "FF", "GG", "HH") # separate "y" into four columns
  ),
  verbose = FALSE
)
#>   A B    C   EE   FF   GG   HH
#> 1 1 a    6    m    n   99   22
#> 2 2 b    7   77    f    g   34
#> 3 3 c    8   44    9 <NA> <NA>
#> 4 5 j <NA> <NA> <NA> <NA> <NA>

Created on 2023-06-11 with reprex v2.0.2

@etiennebacher
Copy link
Member

Separating multiple cols

Actually, I'm fine with separating several columns at the same time. I can see this happening if you have multiple columns with dates for example. I like specifying new_columns as a list like above. If new_cols is just one vector then you could add the original varname as prefix instead of adding .1, .2 etc as suffix. For example, this code:

data_separate(data, c("date1", "date2", "date3", etc.), new_columns = c("year", "month", "day"))

would create "date1_year", "date1_month", "date1_day", "date2_year", etc. instead of "year", "month", "day", "year.1", "month.1"...

Guessing columns

I still don't see the point of guess_columns though because we have to rename columns after anyway. We can let it if you prefer but I'd like it to be an explicit choice by the user rather than a default if new_columns is missing. So in the example below, instead of automatically splitting data, I think having an error Please specify either `new_columns` or `guess_columns`. would be better:

d <- data.frame(
  x = c("1.a.6", "2.b.7", "3.c.8"),
  stringsAsFactors = FALSE
)
d
#>       x
#> 1 1.a.6
#> 2 2.b.7
#> 3 3.c.8
datawizard::data_separate(d)
#> Column `x` had different number of values after splitting. Variable was
#>   split into 3 columns.
#>   split_1 split_2 split_3
#> 1       1       a       6
#> 2       2       b       7
#> 3       3       c       8

Same for the merge_multiple, I don't see the point but just let it if you want

@strengejacke

This comment was marked as outdated.

@strengejacke
Copy link
Member Author

library(datawizard)
d_sep <- data.frame(
  x = c("1.a.6", "2.b.7.d", "3.c.8", "5.j"),
  y = c("m.n.99.22", "77.f.g.34", "44.9", NA),
  stringsAsFactors = FALSE
)

data_separate(d_sep)
#> Error: Cannot separate values. Either `new_columns` or `guess_columns` must be
#>   provided.

data_separate(d_sep, guess_columns = "mode")
#> Column `x` had different number of values after splitting. Variable was
#>   split into 3 columns.
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `x`returned fewer columns than expected after splitting. Right-most
#>   columns were filled with `NA`.
#> Column `y` had different number of values after splitting. Variable was
#>   split into 4 columns.
#> `y`returned fewer columns than expected after splitting. Right-most
#>   columns were filled with `NA`.
#>   x_1 x_2  x_3  y_1  y_2  y_3  y_4
#> 1   1   a    6    m    n   99   22
#> 2   2   b    7   77    f    g   34
#> 3   3   c    8   44    9 <NA> <NA>
#> 4   5   j <NA> <NA> <NA> <NA> <NA>

data_separate(d_sep, new_columns = c("AA", "BB"))
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `y` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#>   x_AA x_BB y_AA y_BB
#> 1    1    a    m    n
#> 2    2    b   77    f
#> 3    3    c   44    9
#> 4    5    j <NA> <NA>

data_separate(d_sep, new_columns = c("AA", "BB"), merge_multiple = TRUE)
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `y` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#>    AA  BB
#> 1  1m  an
#> 2 277  bf
#> 3 344  c9
#> 4 5NA jNA

data_separate(d_sep, new_columns = list(x = c("AA", "BB"), y = c("KK", "LL")))
#> `x` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#> `y` returned more columns than expected after splitting. Right-most
#>   columns have been dropped.
#>   AA BB   KK   LL
#> 1  1  a    m    n
#> 2  2  b   77    f
#> 3  3  c   44    9
#> 4  5  j <NA> <NA>

Created on 2023-06-12 with reprex v2.0.2

Copy link
Member

@etiennebacher etiennebacher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, tests are very complete, just a few points (mostly about an unwanted message and the append arg). Thank you @strengejacke

R/data_separate.R Outdated Show resolved Hide resolved
R/data_separate.R Outdated Show resolved Hide resolved
R/data_separate.R Outdated Show resolved Hide resolved
R/data_separate.R Outdated Show resolved Hide resolved
R/data_separate.R Outdated Show resolved Hide resolved
R/data_separate.R Outdated Show resolved Hide resolved
R/data_separate.R Outdated Show resolved Hide resolved
tests/testthat/test-data_separate.R Outdated Show resolved Hide resolved
@etiennebacher
Copy link
Member

Also can you udpate the "coming from tidyverse" vignette with this function?

@strengejacke
Copy link
Member Author

Also can you udpate the "coming from tidyverse" vignette with this function?

I'm not sure about all the : and {}, and don't know which part of the vignette is required and can/should be copy-pasted to update the vignette... :-/

@strengejacke
Copy link
Member Author

I'll give it a try

@strengejacke
Copy link
Member Author

Can you look at the changes I made to the vignette?

@strengejacke strengejacke merged commit 756f3a6 into main Jun 13, 2023
@strengejacke strengejacke deleted the strengejacke/issue423 branch June 13, 2023 10:44
@etiennebacher
Copy link
Member

Yes it looks fine

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

datawizard versions of separate() and unite()
4 participants