-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsync_canvas.R
163 lines (144 loc) · 5.51 KB
/
sync_canvas.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# ---------------------------------------------
# Filename: Sync Canvas Files
# Author: Jordan Farrer
# Date Created: 2018-01-16
# ---------------------------------------------
# ---------------------------------------------
# Modify this path
# ---------------------------------------------
file_dir <- '/Users/jordanfarrer/Dropbox/Wharton/Spring 2018/'
# ---------------------------------------------
#
# !!
# DO NOT CHANGE ANYTHING BELOW
# !!
#
# ---------------------------------------------
# ---------------------------------------------
# Loads the two required packages
# ---------------------------------------------
suppressPackageStartupMessages(require(here))
suppressPackageStartupMessages(require(jsonlite))
suppressPackageStartupMessages(require(tidyverse))
# ---------------------------------------------
# Sets the Canvas API path
# Access the API token in .Renviron
# ---------------------------------------------
canvas_api <- 'https://canvas.instructure.com/api/v1/'
sync_canvas_dir <- here::here()
access_token <- Sys.getenv("CANVAS_TOKEN")
# ---------------------------------------------
# Load the course list to synced and
# add the prefix to each of the course IDs
# ---------------------------------------------
courses <-
read_csv(file.path(sync_canvas_dir, "course_list.csv")) %>%
mutate(course_id = paste0("25000000", course_id))
# ---------------------------------------------
# Function that builds the folder structure
# for each course
# ---------------------------------------------
folder_structure <- function(course_name, course_id) {
folder_url <- paste0(canvas_api, 'courses/', course_id, '/folders?access_token=', access_token, '&per_page=100000')
course_folders <-
fromJSON(folder_url) %>%
as_tibble() %>%
arrange(id) %>%
mutate(
folder_id = as.character(id)
, folder_path = str_replace(full_name, 'course files', '')
) %>%
select(folder_id, folder_path)
walk(pull(course_folders, folder_path), function(x) {
dir.create(file.path(file_dir, course_name, x), showWarnings = FALSE, recursive = TRUE)
})
return(course_folders)
}
# ---------------------------------------------
# Function that gets information about each
# file for each course: file_id, file_name,
# file_size, and the file_url for downloading
# ---------------------------------------------
get_files_in_folder <- function(course_name, folder_id, folder_path) {
folder_files_url <- paste0(canvas_api, 'folders/', folder_id, '/files?access_token=', access_token, '&per_page=100000')
folder_files <- fromJSON(folder_files_url) %>% as_tibble()
if(nrow(folder_files) != 0) {
folder_files %>%
mutate(file_id = as.character(id)) %>%
select(file_id, file_name = display_name, size, file_url = url)
} else{
folder_files
}
}
# ---------------------------------------------
# Function that downloads each file if it
# (1) does not exist or (2) exists but is a
# different file size
# ---------------------------------------------
download_file <- function(course_name, folder_path, file_name, file_url, size) {
full_file_path <- file.path(file_dir, course_name, folder_path, file_name)
# Download if file does not exist or if the file size is not the same
if(!(file.exists(full_file_path) && file.size(full_file_path) == size)) {
download.file(file_url, destfile = full_file_path, method = 'auto', quiet = TRUE)
return(TRUE)
} else{
return(FALSE)
}
}
# ---------------------------------------------
# Uses each function to gather the folder
# structure, the files to download, and
# then performs the downloading
# Returns a data frame where each row represents
# each file in Canvas files
# ---------------------------------------------
download_log <-
courses %>%
mutate(folder_structure = map2(course_name, course_id, folder_structure)) %>%
unnest() %>%
mutate(files_in_folder = pmap(list(course_name, folder_id, folder_path), get_files_in_folder)) %>%
unnest() %>%
mutate(downloaded = pmap_lgl(list(course_name, folder_path, file_name, file_url, size), download_file))
# ---------------------------------------------
#
# All code below relates only to logging
#
# ---------------------------------------------
# ---------------------------------------------
# Find only the files that were were downloaded
# ---------------------------------------------
files_downloaded <-
download_log %>%
filter(downloaded) %>%
mutate(run_time = Sys.time()) %>%
select(run_time, course_name, folder_path, file_name)
# ---------------------------------------------
# If no files downloaded, created a data frame
# skeleton that includes the run_time
# ---------------------------------------------
if(nrow(files_downloaded) == 0) {
files_downloaded <- tribble(
~run_time, ~course_name, ~folder_path, ~file_name
, Sys.time(), "", "", ""
)
}
# ---------------------------------------------
# Get the current download log, if one does
# not yet exist, create the structure for
# one
# ---------------------------------------------
if (file.exists(file.path(sync_canvas_dir, "sync_canvas_log.csv"))) {
current_log <- read_csv(file.path(sync_canvas_dir, "sync_canvas_log.csv"))
} else {
current_log <- tribble(~run_time, ~course_name, ~folder_path, ~file_name)
}
# ---------------------------------------------
# Put the most recently downloaded files on
# top of previously downloaded ones and
# export the log
# ---------------------------------------------
bind_rows(
files_downloaded
, current_log
) %>%
write_csv(file.path(sync_canvas_dir, "sync_canvas_log.csv"))