forked from cambiotraining/r-intermediate
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfake-data.Rmd
126 lines (91 loc) · 2.49 KB
/
fake-data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
title: "Data Example"
author: "Mark Dunning"
date: "2 February 2016"
output: html_document
---
```{r}
library(wakefield)
set.seed(666)
random_patients <- function(n) {
as.data.frame(r_data_frame(
n,
id,
name,
race,
age,
sex,
smokes,
height_cm(name = "Height"),
normal(name="Weight"),
birth(random = TRUE, x = NULL, start = Sys.Date() - 365 * 45, k = 365*2,by = "1 days"),
state,
pet,
grade_level(x=1:3),
died,
normal(name="Count"),
date_stamp)
)
}
patients <- random_patients(100)
patients$Height <- ifelse(patients$Sex == "Male",rnorm(n=100,mean=175.3,sd=6),rnorm(n=100,161.9,sd=3))
patients$Weight <- ifelse(patients$Sex == "Male",rnorm(n=100,mean=84,sd=6),rnorm(n=100,69,sd=3))
patients$Weight <- paste0(patients$Weight, "kg")
```
1. Change some values of `Smokes` to yes/no
```{r}
rand_rows <- sample(1:100,10)
patients$Smokes[rand_rows] <- sample(c("Yes","No"),10,replace=TRUE)
```
2. Put some zeros in the `Age` column
```{r}
rand_rows <- sample(1:100,10)
patients$Age[rand_rows] <- 0
```
```{r}
patients$Height <- as.character(patients$Height)
patients$Height <- paste0(patients$Height,"cm")
```
3. Add whitespace to Male / Female columns
```{r}
rand_rows <- sample(1:100,10)
patients$Sex <- as.character(patients$Sex)
patients$Sex[rand_rows] <- paste0(" ", patients$Sex[rand_rows])
```
4. upper-case convert some of the pets
```{r}
rand_rows <- sample(1:100,10)
patients$Pet <- as.character(patients$Pet)
patients$Pet[rand_rows] <- toupper(patients$Pet[rand_rows])
```
5. convert some of the `None` pets to NA or NULL
```{r}
patients$Race <- as.character(patients$Race)
error_row <- sample(which(patients$Pet != "None"),1)
patients$Race[error_row] <- patients$Pet[error_row]
no_pet <- which(patients$Pet == "None")
patients$Pet[sample(no_pet,2)] <- "NA"
patients$Pet[sample(no_pet,3)] <- "NULL"
```
6. Make some of the grades '99'
```{r}
rand_rows <- sample(1:100,7)
patients$Grade_Level <- as.numeric(patients$Grade_Level)
patients$Grade_Level[rand_rows] <- 99
```
7. convert IDs to numbers
```{r}
patients$ID <- as.numeric(patients$ID)
```
```{r}
head(patients)
write.table(patients, file="patient-data.txt",sep="\t",row.names = FALSE)
```
Generate some random gene measurements
```{r}
library(stringr)
counts <- matrix(rnorm(820),ncol=82)
colnames(counts) <- str_pad(sample(1:100,82),pad="0",width=3)
counts <- data.frame(GeneID = LETTERS[1:10],counts)
write.table(counts, file="gene-counts.txt",sep="\t",quote=FALSE)
```