-
Notifications
You must be signed in to change notification settings - Fork 2
/
text_basics.R
117 lines (73 loc) · 2.63 KB
/
text_basics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
library(tidyverse)
######################################################################
# Some basic text manipulation
######################################################################
# paste() joins text - default setting is to put a space in the middle
paste("x","y")
# paste0 changes the default to put nothing in the middle
paste0("x","y")
# paste handles vectors - if they are the same length, one item from the first
# argument will be pasted to one from the second, and so on down the line
paste0(c("x","y","z"),
c("a","b"))
# If the second argument is only one item long,
# it is pasted to every item in the first argument
paste0(c("x","y"),
c("a"))
# If they are unequal lengths you can get strange behavior - not recommended!
paste0(c("x","y","z"),
c("a","b"))
##############################
toolong="this string is good but it is too long"
# This counts characters
toolong %>%
str_length()
# This counts words - the funny thing in the bracket is a "regular expression"
# REs are a bit advanced for this class.. For now just memorize this one
# It basically means "the number of blocks of consecutive alphabetical characters"
toolong %>%
str_count("[[:alpha:]]+")
# str_sub() helps us chop up a strings
toolong %>%
str_sub(0,19)
# negatives count from the end
toolong %>%
str_sub(20,-1)
toolong %>%
str_sub(-9,-1)
##############################
# str_replace substitutes one character string for another
c("abcx","defxx","ghiy") %>%
str_replace("x","y")
c("abc*","defx*","ghiy") %>%
str_replace("*","y")
# what happened? Again, this function uses "regular expressions"
# If you just want the literal characters, you need to "escape" them with \\
c("abc*","defx*","ghiy") %>%
str_replace("\\*","y")
# We can delete character strings by replacing them with an empty string
c("abc","def","rabbit") %>%
str_replace("b","")
# Notice it only replaces the first instance... to replace all, we usd a different function
c("abc","def","rabbit") %>%
str_replace_all("b","")
c("before","abetting","in my bed") %>%
str_replace_all("be","")
##############################
# str_detect() tells us whether a string is contained within another
c("teases","taxes","xylophone") %>%
str_detect("x")
# It is case sensitive
c("Before","abetting","in my bed") %>%
str_detect("be")
# We can convert to lowercase first
c("Before","abetting","in my bed") %>%
str_to_lower() %>%
str_detect("be")
# We can also get counts using str_count()
c("Before","abetting","in my bed") %>%
str_count("be")
c("Before","abetting","in my bed") %>%
str_count("e")
c("Before","abetting","in my bed") %>%
str_count("t")