forked from kb-labb/huggingface_stats
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplot_stats.R
79 lines (64 loc) · 2.61 KB
/
plot_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
library(ggplot2)
library(dplyr)
library(forcats)
Sys.setlocale("LC_TIME", "C")
# dir.create("plots")
data_files <- list.files("data/models", full.names = TRUE)
df <- data_files %>%
purrr::map_df(~readr::read_csv(.))
df_total_sum <- df %>%
group_by(date) %>%
summarize(downloads = sum(downloads))
p_dl_total <- ggplot(data = df_total_sum,
aes(x = date, y = downloads)) +
geom_line(colour = "firebrick2") +
geom_point(shape = 21, size = 1.5, colour = "black", fill = "firebrick2") +
theme_light(base_size = 7) +
scale_y_continuous(breaks = seq(0, max(df_total_sum$downloads) + 2000, by = 10000),
labels = function(x) format(x, big.mark = " ", decimal.mark = ".", scientific = FALSE)) +
scale_x_date(date_labels = "%Y-%b",
breaks = unique(df_total_sum$date)[seq(1, length(unique(df_total_sum$date)), by=2)],
guide = guide_axis(n.dodge = 2)) +
expand_limits(y = 0) +
labs(y = "Number of downloads",
x = "Date",
title = "Total number of downloads per month for TAIDE's models on Huggingface")
df_model <- df %>%
group_by(model_name) %>%
summarize(downloads = sum(downloads)) %>%
arrange(-downloads) %>%
ungroup %>%
slice_max(downloads, n = 10)
# Models with top 10 most downloads over entire period
df_model_top <- df[df$model_name %in% df_model$model_name, ]
df_model_top <- df_model_top %>%
group_by(date, model_name) %>%
summarize(downloads = sum(downloads))
p_dl_model <- ggplot(data = df_model_top,
aes(x = date, y = downloads, fill = fct_reorder(model_name, desc(downloads)))) +
geom_line(aes(color = fct_reorder(model_name, desc(downloads)))) +
geom_point(shape = 21, size = 1.5, colour = "black") +
theme_light(base_size = 7) +
scale_y_continuous(breaks = seq(0, max(df_model_top$downloads) + 2000, by = 10000),
labels = function(x) format(x, big.mark = " ", decimal.mark = ".", scientific = FALSE)) +
scale_x_date(date_labels = "%Y-%b",
breaks = unique(df_total_sum$date)[seq(1, length(unique(df_total_sum$date)), by=2)],
guide = guide_axis(n.dodge = 2)) +
expand_limits(y = 0) +
labs(y = "Number of downloads",
x = "Date",
title = "Number of downloads by model name for top 10 models",
fill = "Model") +
guides(color = "none")
ggsave(p_dl_total,
filename = "plots/downloads_total.jpg",
dpi = 300,
width = 1920,
height = 1080,
units = "px")
ggsave(p_dl_model,
filename = "plots/downloads_by_model.jpg",
dpi = 300,
width = 1920,
height = 1080,
units = "px")