-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwodarz2-WR_generator.Rmd
53 lines (45 loc) · 1.45 KB
/
wodarz2-WR_generator.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
---
title: "Untitled"
author: "Pierson Wodarz"
date: "11/30/2021"
output: html_document
---
```{r}
library(dplyr)
```
```{r}
# read in data
myurl = "https://liangfgithub.github.io/MovieData/"
movies = readLines(paste0(myurl, 'movies.dat?raw=true'))
movies = strsplit(movies, split = "::", fixed = TRUE, useBytes = TRUE)
movies = matrix(unlist(movies), ncol = 3, byrow = TRUE)
movies = data.frame(movies, stringsAsFactors = FALSE)
colnames(movies) = c('MovieID', 'Title', 'Genres')
movies$MovieID = as.integer(movies$MovieID)
movies$Title = iconv(movies$Title, "latin1", "UTF-8")
ratings = read.csv(paste0(myurl, 'ratings.dat?raw=true'),
sep = ':',
colClasses = c('integer', 'NULL'),
header = FALSE)
colnames(ratings) = c('UserID', 'MovieID', 'Rating', 'Timestamp')
ratings$Timestamp = NULL
```
```{r}
aggregate = data.frame(ratings %>% group_by(MovieID) %>% summarise(mean_rating = mean(Rating), num_reviews = n()))
aggregate
```
```{r}
mean_C = mean(ratings$Rating)
min_rev = 100
aggregate$WR = with(aggregate, (mean_rating * (num_reviews / (num_reviews + min_rev))) + (mean_C * (min_rev / (num_reviews + min_rev))))
aggregate
```
```{r}
joined = merge(movies, aggregate, all = FALSE, by="MovieID")
joined
write.table(joined, file = "WR_movies.dat", quote= FALSE, row.names = FALSE, col.names = FALSE, sep = "::")
```
```{r}
result = joined %>% filter(grepl("Children's",Genres)) %>% arrange(desc(WR))
result[1:10,]
```