練習題

請將 data.csv1 讀入成一個 data frame,並:

  1. 請刪除重複的資料
  2. 請使用105-108年的資料
  3. 請將各縣市各年各月份的垃圾產生量 GarbageGenerated 製作成各縣市年度總垃圾量TotalGarbage摘要表
  4. 將請寫一個函數分類各縣市所在的區域,並為摘要表新增區域region欄位
  5. 繪出以各縣市為x軸座標,以TotalGarbage為y軸,並以年份為facet,用顏色標示區域region
# 讀取資料
# 分類縣市函數
# 資料整理
# 視覺化

Solution A

# 讀取資料
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
df <- readr::read_csv("data.csv")
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   year = col_double(),
#>   month = col_double(),
#>   county = col_character(),
#>   GarbageGenerated = col_number(),
#>   GarbageClearance = col_number(),
#>   GarbageRecycled = col_number(),
#>   FoodWastesRecycled = col_number(),
#>   BulkWasteRecyclingandReuse = col_number()
#> )
# 分類縣市函數
cls <- list(
   island = "金門縣 連江縣 澎湖縣",
   north = "基隆市 新北市 臺北市 桃園市 新竹縣 新竹市 宜蘭縣",
   center = "苗栗縣 臺中市 彰化縣 雲林縣 南投縣",
   south = "嘉義縣 嘉義市 臺南市 高雄市 屏東縣",
   east = "花蓮縣 臺東縣"
)
cls <- lapply(cls, function(x) strsplit(x, " ")[[1]])

region_atom <- function(x) {
   for (name in names(cls))
      if (x %in% cls[[name]]) return(name)
   return(NULL)
}
region <- function(x) {
   results <- sapply(x, region_atom, USE.NAMES = F)
   return(results)
}
# 資料整理
garbage_by_region <- df %>%
   distinct() %>%
   filter(year %in% 105:108) %>%
   group_by(year, county) %>%
   summarise(TotalGarbage = sum(GarbageGenerated)) %>%
   ungroup() %>%
   mutate(region = region(county))
#> `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
garbage_by_region
#> # A tibble: 88 x 4
#>     year county TotalGarbage region
#>    <dbl> <chr>         <dbl> <chr> 
#>  1   105 南投縣       111316 center
#>  2   105 嘉義市        58253 south 
#>  3   105 嘉義縣       108611 south 
#>  4   105 基隆市        89712 north 
#>  5   105 宜蘭縣        97757 north 
#>  6   105 屏東縣       169102 south 
#>  7   105 彰化縣       238499 center
#>  8   105 新北市       577992 north 
#>  9   105 新竹市        88575 north 
#> 10   105 新竹縣        99268 north 
#> # … with 78 more rows
# 視覺化
ggplot(garbage_by_region) +
   geom_bar(aes(county, TotalGarbage,
                fill = region), 
            stat = "identity",
            position = "dodge") +
   facet_wrap(facets = vars(year)) +
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Solution B

library(dplyr)
library(ggplot2)

trash <- readr::read_csv("data.csv")
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   year = col_double(),
#>   month = col_double(),
#>   county = col_character(),
#>   GarbageGenerated = col_number(),
#>   GarbageClearance = col_number(),
#>   GarbageRecycled = col_number(),
#>   FoodWastesRecycled = col_number(),
#>   BulkWasteRecyclingandReuse = col_number()
#> )
head(trash)

as_region <- function(county) {
  region <- sapply(county, function(x) {
    if (x %in% c("臺北市", "新北市", "基隆市", "桃園市", "新竹市", "新竹縣", "宜蘭縣"))
      return("北部")
    else if (x %in% c("苗栗縣", "臺中市", "彰化縣", "南投縣", "雲林縣"))
      return("中部")
    else if (x %in% c("嘉義市", "嘉義縣", "臺南市", "高雄市", "屏東縣", "澎湖縣"))
      return("南部")
    else if (x %in% c("花蓮縣", "臺東縣")) 
      return("東部")
    else return("離島")
  })
  return(region)
}

# 摘要表
total_trash <- trash %>% 
  filter(year != 109) %>% 
  distinct() %>% 
  group_by(county, year) %>% 
  summarise(TotalGarbage = sum(GarbageGenerated)) %>%
  ungroup() %>% 
  mutate(Region = as_region(county))
#> `summarise()` has grouped output by 'county'. You can override using the `.groups` argument.
# 長條圖
ggplot(total_trash) +
  geom_bar(aes(county, TotalGarbage, fill=Region), stat = "identity") +
  facet_wrap(vars(year)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5))

#> # A tibble: 6 x 8
#>    year month county GarbageGenerated GarbageClearance GarbageRecycled
#>   <dbl> <dbl> <chr>             <dbl>            <dbl>           <dbl>
#> 1   109    10 金門縣             620.             620.           1332.
#> 2   109    10 連江縣             201.             201.            263.
#> 3   109    10 新北市           43161.           35698.          95007.
#> 4   109    10 臺北市           15842.           13524.          36382.
#> 5   109    10 桃園市           42540.           40539.          55435.
#> 6   109    10 臺中市           39405.           28373.          46687.
#> # … with 2 more variables: FoodWastesRecycled <dbl>,
#> #   BulkWasteRecyclingandReuse <dbl>

  1. 資料來源:https://data.gov.tw/dataset/89022↩︎