From a959e5489dc385b673d395d48514db770151f6df Mon Sep 17 00:00:00 2001
From: JL HSIEH <jirlong@gmail.com>
Date: Mon, 27 May 2024 11:44:59 +0800
Subject: [PATCH] update

---
 R04_5 ggplotly.Rmd       |  4 ++--
 R04_6 gganimate.Rmd      | 25 ++++++++++++++++++-------
 R05_3p_tm_typhoon.Rmd    | 14 +++++++++++---
 R24_categorical_data.Rmd | 10 ++++++----
 4 files changed, 37 insertions(+), 16 deletions(-)
diff --git a/R04_5 ggplotly.Rmd b/R04_5 ggplotly.Rmd
index 8743ec9..a0acf0a 100644
--- a/R04_5 ggplotly.Rmd	
+++ b/R04_5 ggplotly.Rmd	
@@ -55,7 +55,7 @@ NW.plot <- NW %>%
          x = "Year",
          y = "Net Worth") + th
 
-ggplotly(NW.plot) %>%
+ggplotly(NW.plot)
   config(displayModeBar = FALSE)
 ```
 
@@ -75,7 +75,7 @@ NW.plot <- NW %>%
         text = paste0("<b>年(X): </b>", year, "<br>",
                       "<b>淨資產(Y): </b>", Net_Worth,"<br>",
                       "<b>年齡組: </b>", Category),
-        group=1) + 
+        group=Category) + 
     geom_line() + 
     theme_minimal() + 
     labs(title = "Net Worth by year grouped by age groups",
diff --git a/R04_6 gganimate.Rmd b/R04_6 gganimate.Rmd
index d02bd02..a6fc61a 100644
--- a/R04_6 gganimate.Rmd	
+++ b/R04_6 gganimate.Rmd	
@@ -31,24 +31,34 @@ pml <- read_excel("data/WORLD-MACHE_Gender_6.8.15.xls", "Sheet1", col_names=T) %
 2.  **`wmap <- fortify(wmap)`**: 使用**`fortify()`**函數將地圖的地理資料轉換為ggplot2套件可以理解的格式。這一步將地理資料轉換為一個資料框（data frame），其中每一行對應地圖上的一個多邊形區域。
 
 ```{r get-worldmap}
+# install.packages("rgdal")
+library(sf)
 library(rworldmap)
 wmap <- getMap(resolution="low")
 wmap <- spTransform(wmap, CRS("+proj=robin")) # reproject
 wmap <- fortify(wmap)
 wmap %>%
     filter(!duplicated(id))
+
+
+data("countriesLow", package = "rworldmap")
+countries_sf <- st_as_sf(countriesLow)
+countries_sf_robinson <- st_transform(countries_sf, crs = "+proj=robin")
+ggplot(data = countries_sf_robinson) +
+  geom_sf() +
+  theme_minimal()
 ```
 
 ```{r join-map-data}
-pml_map <- wmap %>%
-    left_join(pml, by=c("id"="country")) %>%
+pml_map <- countries_sf_robinson %>%
+    left_join(pml, by="ISO3") %>%
     filter(!is.na(ISO3)) %>%
     mutate(year = as.integer(year))
 
 # devtools::install_github("thomasp85/transformr")
 
 pml_map %>%
-    select(id) %>%
+    select(ISO3) %>%
     filter(!duplicated(.))
 ```
 
@@ -60,10 +70,11 @@ pml_map %>%
 pml_map %>%
     filter(year==1995) %>%
     ggplot() + 
-    aes(x = long, y = lat, 
-                     group=group, fill=factor(degree)) + 
-    geom_polygon(color="grey") +
-    theme_void() + 
+    aes(x = LON, y = LAT, 
+        fill=factor(degree)) + 
+    geom_sf() +
+    theme_minimal()
+    # theme_void() + 
     scale_fill_manual(values=c("1"="red",
                                "2"="LightCyan",
                                "3"="lightskyblue",
diff --git a/R05_3p_tm_typhoon.Rmd b/R05_3p_tm_typhoon.Rmd
index 666b938..ecdef13 100644
--- a/R05_3p_tm_typhoon.Rmd
+++ b/R05_3p_tm_typhoon.Rmd
@@ -152,7 +152,7 @@ news.df %>%
 # tf_idf
 
 ```{r}
-unnested.df %>%
+tf_idf <- unnested.df %>%
     group_by(word) %>%
     filter(n() > 10) %>%
     ungroup() %>%
@@ -162,6 +162,14 @@ unnested.df %>%
     arrange(-tf_idf) %>%
     slice(1:15) %>%
     ungroup() %>%
-    left_join(news.df %>% select(doc_id, title)) %>% View
-    
+    left_join(news.df %>% select(doc_id, title))
+```
+
+```{r}
+tf_idf %>%
+    group_by(doc_id) %>%
+    arrange(-tf_idf) %>%
+    slice(1:5) %>%
+    ungroup() %>%
+    count(word, sort = T)
 ```
diff --git a/R24_categorical_data.Rmd b/R24_categorical_data.Rmd
index b9957d4..d7c6ac2 100644
--- a/R24_categorical_data.Rmd
+++ b/R24_categorical_data.Rmd
@@ -66,13 +66,13 @@ dt <- raw %>%
 ```
 
 ```{r}
-dt$QA3[1:10]
-as.integer(dt$QA3[1:10])
+dt$QA3_lv[1:10]
+as.integer(dt$QA3_lv[1:10])
 ```
 
 ### Excluding {#excluding}
 
-如果有某些類別變數的值（如「拒答」）不想被編入`factor`，可以在`reorder()`中加入`exclude`的參數指定不想被編入類別值。
+如果有某些類別變數的值（如「拒答」）不想被編入`factor`，可以在`order()`中加入`exclude`的參數指定不想被編入類別值。
 
 ```         
   mutate(QASide=ordered(QASide, 
@@ -154,7 +154,6 @@ dt2 %>% count(Q7_3rd_lv)
 -   連續性係數（Contingency Coeff.）和克拉瑪爾V（Cramer's V）的值，表示兩個變數之間的關聯性較弱，但仍存在一定程度的相關性。
 
 ```{r}
-
 (xtb <- xtabs(~QA3_lv + Q7_3rd_lv, data=dt2))
 (chi2 <- chisq.test(xtb))
 
@@ -162,6 +161,9 @@ vcd::assocstats(xtb)
 print(round(chi2$observed, 2))
 print(round(chi2$expected, 2))
 print(round(chi2$residuals, 2))
+
+
+
 ```
 
 ```{r}