Skip to content

Commit

Permalink
update viz document
Browse files Browse the repository at this point in the history
  • Loading branch information
kingaa committed Sep 5, 2023
1 parent d57f021 commit b28e262
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 192 deletions.
8 changes: 4 additions & 4 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -375,13 +375,13 @@ <h4 class="author">Aaron A. King</h4>
<td align="left"><a href="tutorial.html">HTML</a></td>
</tr>
<tr class="even">
<td align="left">Data visualization</td>
<td align="left"><a href="viz.html">HTML</a> <a href="viz.R">R</a></td>
</tr>
<tr class="odd">
<td align="left">Data munging with <strong>dplyr</strong>, <strong>tidyr</strong>, and pipelines</td>
<td align="left"><a href="munging.html">HTML</a> <a href="munging.R">R</a></td>
</tr>
<tr class="odd">
<td align="left">Data visualization</td>
<td align="left"><a href="viz.html">HTML</a> <a href="viz.R">R</a></td>
</tr>
</tbody>
</table>
<hr />
Expand Down
2 changes: 1 addition & 1 deletion index.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ output:
| &nbsp; | &nbsp; |
|:------------------------------------------------------|:-----------------------------------------|
| Tutorial Introduction to **R** | [HTML](tutorial.html) |
| Data visualization | [HTML](viz.html)&nbsp;[R](viz.R) |
| Data munging with **dplyr**, **tidyr**, and pipelines | [HTML](munging.html)&nbsp;[R](munging.R) |
| Data visualization | [HTML](viz.html)&nbsp;[R](viz.R) |

----------------------
141 changes: 86 additions & 55 deletions viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,71 +36,102 @@ plot(oil)
plot(Gbbl~year,data=oil,subset=region=="North.America",type='l')
lines(Gbbl~year,data=oil,subset=region=="Eurasia",type="l",col='red')

library(reshape2)
library(tidyr)
library(dplyr)

dcast(oil,year~region) -> wideOil
names(wideOil)
wideOil$total <- wideOil$Africa+wideOil$Asia+wideOil$Central+wideOil$Eurasia+wideOil$Europe+wideOil$Middle+wideOil$North.America
wideOil$total <- apply(wideOil[,-1],1,sum)
plot(wideOil$year,wideOil$total,type='l')
oil |>
group_by(year) |>
summarize(Gbbl=sum(Gbbl)) -> total
plot(Gbbl~year,data=total,type='l')

read.csv(
library(readr)
read_csv(
"https://kingaa.github.io/R_Tutorial/data/energy_production.csv",
comment.char="#"
comment="#"
) -> energy

library(ggplot2)

ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region,linetype=source))+geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region))+geom_line()+facet_wrap(~source)
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=source))+geom_line()+facet_wrap(~region,ncol=2)

ggplot(data=energy,mapping=aes(x=year,y=TJ))+geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,group=source))+geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region,linetype=source))+
geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region))+
geom_line()+
facet_wrap(~source)
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=source))+
geom_line()+
facet_wrap(~region,ncol=2)

ggplot(data=energy,mapping=aes(x=year,y=TJ))+
geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,group=source))+
geom_line()

ggplot(data=energy,mapping=aes(x=year,y=TJ,group=interaction(source,region)))+
geom_line()

library(reshape2)

tot <- dcast(energy,year+source~'TJ',value.var="TJ",fun.aggregate=sum)
ggplot(data=tot,mapping=aes(x=year,y=TJ,color=source))+geom_line()
ggplot(data=tot,mapping=aes(x=year,y=TJ,fill=source))+geom_area()


reg <- dcast(energy,region+source~'TJ',value.var="TJ",fun.aggregate=mean)
ggplot(data=reg,mapping=aes(x=region,y=TJ,fill=source))+
geom_bar(stat="identity")+coord_flip()

library(plyr)
energy |>
group_by(year,source) |>
summarize(TJ=sum(TJ)) |>
ungroup() -> tot

ddply(energy,~region+source,summarize,TJ=mean(TJ)) -> x

ggplot(data=x,mapping=aes(x=region,y=TJ,fill=source))+
geom_bar(stat="identity")+coord_flip()

ddply(x,~region,mutate,frac=TJ/sum(TJ)) -> y

ggplot(data=y,mapping=aes(x=region,y=frac,fill=source))+
geom_bar(stat="identity")+coord_flip()+labs(x="fraction of production")


library(plyr)

mutate(energy,
source=as.character(source),
source1=mapvalues(source,
from=c("Hydro","Other Renewables","Coal","Oil","Gas"),
to=c("Renewable","Renewable","Carbon","Carbon","Carbon"))
) -> energy

ddply(energy,~source1+region+year,summarize,TJ=sum(TJ)) -> x

ggplot(data=x,mapping=aes(x=year,y=TJ,fill=source1))+
geom_area()+
facet_wrap(~region,scales="free_y",ncol=2)

ddply(energy,~source1+year,summarize,TJ=sum(TJ)) -> x
tot |>
ggplot(aes(x=year,y=TJ,color=source))+
geom_line()

ggplot(data=x,mapping=aes(x=year,y=TJ,fill=source1))+
geom_area()
tot |>
ggplot(aes(x=year,y=TJ,fill=source))+
geom_area()

energy |>
group_by(region,source) |>
summarize(TJ=mean(TJ)) |>
ungroup() -> reg

reg |>
ggplot(aes(x=region,y=TJ,fill=source))+
geom_bar(stat="identity")+
coord_flip()

reg |>
group_by(region) |>
mutate(frac = TJ/sum(TJ)) |>
ungroup() -> reg

reg |>
ggplot(aes(x=region,y=frac,fill=source))+
geom_bar(stat="identity")+
coord_flip()+
labs(y="fraction of production",x="region")

data.frame(
source=c("Coal","Gas","Oil","Nuclear","Hydro","Other Renewables"),
source1=c("Carbon","Carbon","Carbon","Nuclear","Renewable","Renewable")
) |>
right_join(energy,by="source") -> energy

energy |>
group_by(source1,region,year) |>
summarize(TJ = sum(TJ)) |>
ungroup() -> x

x |>
ggplot(aes(x=year,y=TJ,fill=source1))+
geom_area()+
facet_wrap(~region,ncol=2)+
labs(fill="source")

x |>
ggplot(aes(x=year,y=TJ,fill=source1))+
geom_area()+
facet_wrap(~region,scales="free_y",ncol=2)+
labs(fill="source")

x |>
group_by(source1,year) |>
summarize(TJ = sum(TJ)) |>
ungroup() -> y

y |>
ggplot(aes(x=year,y=TJ,fill=source1))+
geom_area()+
labs(fill="source")
144 changes: 86 additions & 58 deletions viz.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,13 @@ plot(oil)
plot(Gbbl~year,data=oil,subset=region=="North.America",type='l')
lines(Gbbl~year,data=oil,subset=region=="Eurasia",type="l",col='red')
library(reshape2)
library(tidyr)
library(dplyr)
dcast(oil,year~region) -> wideOil
names(wideOil)
wideOil$total <- wideOil$Africa+wideOil$Asia+wideOil$Central+wideOil$Eurasia+wideOil$Europe+wideOil$Middle+wideOil$North.America
wideOil$total <- apply(wideOil[,-1],1,sum)
plot(wideOil$year,wideOil$total,type='l')
oil |>
group_by(year) |>
summarize(Gbbl=sum(Gbbl)) -> total
plot(Gbbl~year,data=total,type='l')
```

## A systematic approach to visualization: the Grammar of Graphics
Expand Down Expand Up @@ -145,25 +145,33 @@ This is implemented in the **ggplot2** package.
### Energy production

```{r}
read.csv(
library(readr)
read_csv(
"https://kingaa.github.io/R_Tutorial/data/energy_production.csv",
comment.char="#"
comment="#"
) -> energy
library(ggplot2)
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region,linetype=source))+geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region))+geom_line()+facet_wrap(~source)
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=source))+geom_line()+facet_wrap(~region,ncol=2)
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region,linetype=source))+
geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=region))+
geom_line()+
facet_wrap(~source)
ggplot(data=energy,mapping=aes(x=year,y=TJ,color=source))+
geom_line()+
facet_wrap(~region,ncol=2)
```

What can you conclude from the above?
Try plotting these data on the log scale (`scale_y_log10()`).
How does your interpretation change?

```{r}
ggplot(data=energy,mapping=aes(x=year,y=TJ))+geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,group=source))+geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ))+
geom_line()
ggplot(data=energy,mapping=aes(x=year,y=TJ,group=source))+
geom_line()
```

**Question:** How do you account for the appearance of the two plots immediately above?
Expand All @@ -177,38 +185,45 @@ ggplot(data=energy,mapping=aes(x=year,y=TJ,group=interaction(source,region)))+

Let's aggregate across regions by year and source of energy.
```{r}
library(reshape2)
energy |>
group_by(year,source) |>
summarize(TJ=sum(TJ)) |>
ungroup() -> tot
tot <- dcast(energy,year+source~'TJ',value.var="TJ",fun.aggregate=sum)
ggplot(data=tot,mapping=aes(x=year,y=TJ,color=source))+geom_line()
ggplot(data=tot,mapping=aes(x=year,y=TJ,fill=source))+geom_area()
tot |>
ggplot(aes(x=year,y=TJ,color=source))+
geom_line()
tot |>
ggplot(aes(x=year,y=TJ,fill=source))+
geom_area()
```

Now let's aggregate across years by region and source.

```{r}
reg <- dcast(energy,region+source~'TJ',value.var="TJ",fun.aggregate=mean)
ggplot(data=reg,mapping=aes(x=region,y=TJ,fill=source))+
geom_bar(stat="identity")+coord_flip()
```

An even better way to manipulate the data is to use the **plyr** package.
[See the data munging tutorial.](./data_munging.html)
See the [data munging tutorial](./munging.html) for more information on manipulating and reshaping data frames.

```{r}
library(plyr)
ddply(energy,~region+source,summarize,TJ=mean(TJ)) -> x
ggplot(data=x,mapping=aes(x=region,y=TJ,fill=source))+
geom_bar(stat="identity")+coord_flip()
ddply(x,~region,mutate,frac=TJ/sum(TJ)) -> y
ggplot(data=y,mapping=aes(x=region,y=frac,fill=source))+
geom_bar(stat="identity")+coord_flip()+labs(x="fraction of production")
energy |>
group_by(region,source) |>
summarize(TJ=mean(TJ)) |>
ungroup() -> reg
reg |>
ggplot(aes(x=region,y=TJ,fill=source))+
geom_bar(stat="identity")+
coord_flip()
reg |>
group_by(region) |>
mutate(frac = TJ/sum(TJ)) |>
ungroup() -> reg
reg |>
ggplot(aes(x=region,y=frac,fill=source))+
geom_bar(stat="identity")+
coord_flip()+
labs(y="fraction of production",x="region")
```

In the above, we first average across years for every region and source.
Expand All @@ -218,26 +233,41 @@ The `coord_flip` coordinate specification gives us horizontal bars instead of th
Fancy!

Let's compare fossil fuel production to renewable.
We divide the sources into three types: Carbon-based, Nuclear, and Renewable.
We accomplish this using a "crosswalk" table:
```{r}
library(plyr)
mutate(energy,
source=as.character(source),
source1=mapvalues(source,
from=c("Hydro","Other Renewables","Coal","Oil","Gas"),
to=c("Renewable","Renewable","Carbon","Carbon","Carbon"))
) -> energy
ddply(energy,~source1+region+year,summarize,TJ=sum(TJ)) -> x
ggplot(data=x,mapping=aes(x=year,y=TJ,fill=source1))+
geom_area()+
facet_wrap(~region,scales="free_y",ncol=2)
ddply(energy,~source1+year,summarize,TJ=sum(TJ)) -> x
ggplot(data=x,mapping=aes(x=year,y=TJ,fill=source1))+
geom_area()
data.frame(
source=c("Coal","Gas","Oil","Nuclear","Hydro","Other Renewables"),
source1=c("Carbon","Carbon","Carbon","Nuclear","Renewable","Renewable")
) |>
right_join(energy,by="source") -> energy
energy |>
group_by(source1,region,year) |>
summarize(TJ = sum(TJ)) |>
ungroup() -> x
x |>
ggplot(aes(x=year,y=TJ,fill=source1))+
geom_area()+
facet_wrap(~region,ncol=2)+
labs(fill="source")
x |>
ggplot(aes(x=year,y=TJ,fill=source1))+
geom_area()+
facet_wrap(~region,scales="free_y",ncol=2)+
labs(fill="source")
x |>
group_by(source1,year) |>
summarize(TJ = sum(TJ)) |>
ungroup() -> y
y |>
ggplot(aes(x=year,y=TJ,fill=source1))+
geom_area()+
labs(fill="source")
```

--------------------------
Expand All @@ -250,6 +280,4 @@ Ask a question regarding one of the datasets shown here and devise a visualizati

Produced with **R** version `r getRversion()`.

The [**R** codes for this document are provided in an **R** script](./viz.R) which you can download, edit, and run.

--------------------------
Loading

0 comments on commit b28e262

Please sign in to comment.