# 請勿更動此 code chunk 程式碼
library(dplyr)
library(ggplot2)
# 上週作業使用到的 titanic data
<- readr::read_delim("titanic.csv",
titanic delim = ";",
escape_double = FALSE,
trim_ws = TRUE)
# 上週作業 `分組摘要` 的結果
<- titanic %>%
died_summary group_by(Sex, Pclass) %>%
summarise(percent_survived = mean(Survived == "Yes")) %>%
ungroup()
died_summary
#> # A tibble: 6 x 3
#> Sex Pclass percent_survived
#> <chr> <dbl> <dbl>
#> 1 female 1 0.968
#> 2 female 2 0.921
#> 3 female 3 0.5
#> 4 male 1 0.369
#> 5 male 2 0.157
#> 6 male 3 0.135
此題延續上週作業關於鐵達尼號乘客死亡率的分組摘要。上方的程式碼即是上週分組摘要的答案,儲存於 died_summary
。你的任務是使用 ggplot2
將 died_summary
繪製成此長條圖:
# Write your code here
ggplot(data = died_summary ) +
geom_bar(mapping = aes(x = Pclass, y = percent_survived), stat = "identity") +
facet_wrap(~Sex)
geom_bar()
或是 geom_col()
facet_wrap()
請自行尋找一份資料 (不得使用 titanic.csv
或內建資料),將其放在此次作業的 repo 並命名為 mydata.csv
(副檔名請根據自己的資料而定, e.g., 若為 tab 分隔檔,請命名為 mydata.tsv
)。你的任務是將這份資料讀入並使用 ggplot2 視覺化這份資料。
(10 分) 資料讀取與清理
將 mydata.csv
讀入並進行資料清理 (如果需要的話),以利接下來的資料視覺化
(30 分) 資料視覺化
請依這份資料的特性以及你想觀察的現象,對這份資料進行視覺化。依據你的喜好,你可以畫任意多張圖,但其中一張圖裡「必須」使用到 2 種或 2 種以上的 geom_*()
函數 (助教也只會依據這張圖評分)。這些 geom_*()
的使用需合理。例如,下方的例子雖然仍畫得出圖,但顯然是不合理的,這種情況將不予給分:
ggplot(iris) +
geom_bar(aes(x = Species)) +
geom_point(aes(Sepal.Length, Petal.Width))
(10 分) Tweak the plot
請依據你的個人偏好「修改」於 2.
所繪製出來的圖。例如,你可以使用某個 coord_*()
將圖的 x、y 軸對調;使用其它的風格;或是修改與新增圖的座標軸名稱與標題等。
若覺得題目說明不夠清楚,可以參考此題的範例。
# Write your code here
# 請務必印出 data frame
library(tibble)
library(dplyr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
<- as_tibble(read.csv("mydata.csv"))
ubike #view(ubike)
# date_time <- ymd_hm(ubike$time[9])
# date_time
# hour(date_time)
<- 25.014947
NTUlatitude <- 121.535549
NTUlongitude <- 110.9362
latitude_KM <- 101.77545
longitude_KM
<- ubike %>%
ubikeClean
# 整理出距離台大一公里以內的站
mutate(nearNTU = if_else( ( (abs(latitude - NTUlatitude) * latitude_KM) ** 2 + (abs(longitude - NTUlongitude) * longitude_KM) ** 2) ** 1/2 < 1, 1, 0)) %>%
# 整理出:可借車%、月份、日、時間(24小時制)
mutate(bikePCT = bike / lot, month = month(ymd_hm(time)), mday = mday(ymd_hm(time)), hour = hour(ymd_hm(time)))
#view(ubikeClean)
<- ubikeClean %>%
ubike24hour group_by(station, hour) %>%
# bike_avg:當小時有幾台車可借(一個月平均)
# bikePCT_avg:當小時的可借車率(總可借車數/總車柱數)(一個月平均)
summarise(bikePCT_avg = mean(bikePCT), bike_avg = mean(bike), nearNTU) %>%
group_by(hour, nearNTU) %>%
# hour_bikePCT_avg:(near NTU vs. far NTU)當小時的平均可借車率(一個月平均)
summarise(hour_bikePCT_avg = mean(bikePCT_avg))
#> `summarise()` regrouping output by 'station', 'hour' (override with `.groups` argument)
#> `summarise()` regrouping output by 'hour' (override with `.groups` argument)
view(ubike24hour)
<- ubikeClean %>%
ubikeNTU filter(nearNTU == 1) %>%
group_by(station, hour) %>%
# bike_avg:當小時有幾台車可借(一個月平均)
# bikePCT_avg:當小時的可借車率(總可借車數/總車柱數)(一個月平均)
summarise(bikePCT_avg = mean(bikePCT), bike_avg = mean(bike)) %>%
group_by(hour) %>%
# hour_bikePCT_avg:所有近台大站當小時的平均可借車率(一個月平均)
summarise(station, hour_bikePCT_avg = mean(bikePCT_avg), bikePCT_avg, bike_avg)
#> `summarise()` regrouping output by 'station' (override with `.groups` argument)
#> `summarise()` regrouping output by 'hour' (override with `.groups` argument)
#view(ubikeNTU)
ubikeNTU
#> # A tibble: 216 x 5
#> # Groups: hour [24]
#> hour station hour_bikePCT_avg bikePCT_avg bike_avg
#> <int> <fct> <dbl> <dbl> <dbl>
#> 1 0 JianGuo & Heping Intersection 0.197 0.150 7.77
#> 2 0 Keelung & Changxing Intersection 0.197 0.372 27.5
#> 3 0 MRT Gongguan Sta.(Exit 2) 0.197 0.0312 0.935
#> 4 0 MRT Taipower Building Sta. (Exit~ 0.197 0.398 15.9
#> 5 0 N.T.U.S.T 0.197 0.176 8.10
#> 6 0 NTU Information Bldg. 0.197 0.0771 5.55
#> 7 0 Roosevelt & Xinsheng S. Intersec~ 0.197 0.0436 3.84
#> 8 0 Xinhai & Xinsheng Intersection 0.197 0.3 9
#> 9 0 Xinsheng & Heping Intersection 0.197 0.228 10.5
#> 10 1 JianGuo & Heping Intersection 0.270 0.176 9.13
#> # ... with 206 more rows
<- ggplot(data = ubike24hour, aes(x = hour, y = hour_bikePCT_avg, group = nearNTU, color = factor(nearNTU))) +
ubikeGraphCompare geom_point() +
geom_line() +
scale_color_discrete(name = "distance", labels = c("far NTU", "near NTU")) +
labs(title = "bike pct compare between near and far NTU") + ylab("avg. bike pct (total bike/total lot)") + xlab("hour")
ubikeGraphCompare
# Write your code here
# 請務必印出圖片
library(ggplot2)
library(RColorBrewer)
<- ggplot(data = ubikeNTU) +
ubikeGraph # 主坐標軸:(各站一個月內)當小時的平均車數(堆疊長條圖)
geom_bar(aes(x = hour, y = bike_avg, fill = station), stat = "identity", position = "fill") +
# 修改標籤
labs(title = "bike amount at stations near NTU") + ylab("bike amount") + xlab("hour")+
# 改變配色
scale_fill_brewer(palette="Spectral") +
# 副座標軸:所有站(平均一個月內)當小時的可借車率(總可借車數/總車柱數)
geom_point(aes(x = hour, y = hour_bikePCT_avg)) +
geom_line(aes(x = hour, y = hour_bikePCT_avg)) +
# 修改副座標軸標籤(百分比化)
scale_y_continuous(sec.axis = sec_axis(~. , name = "avg. bike pct (total bike/total lot) ", labels = scales::percent))
ubikeGraph
# Write your code here
library(extrafont)
#> Registering fonts with R
#font_import()
#fonts() # show all fonts import
# 要設 device 才不會出現 error:font family not found in Windows font database
loadfonts(device = "win", quiet = T)
<- ubikeGraph +
ubikeGraphTweak
theme(text = element_text(family="Lucida Handwriting", size = 10, color = "steelblue4"),
panel.background = element_rect(fill = "lightcyan", colour = "steelblue4", size = 2,linetype = "dotted"),
panel.grid.major = element_line(colour = "lightcyan3", size = 0.75, linetype = "solid"),
panel.grid.minor = element_line(colour = "lightcyan3", size = 0.375, linetype = "dashed"))
ubikeGraphTweak
請使用 ggplot2
中的 mpg
這份資料繪製圖表。 (可使用 ?mpg
查看這份資料的說明)
class
是否為 SUV。 (6分)displ
和「每加侖可高速行駛英里」 hwy
的線性回歸線,並將「年分」 year
以不同線條類型標示,且不須繪製信心區間 (請使用 geom_smooth()
)。(6分)displ
的平均值。(6分)SUV
和 Year
。(2分)# Modify the code below
ggplot(data = mpg, mapping = aes(displ, hwy)) +
geom_point(aes(color = ifelse(class == "suv","#F8766D", "#00BFC4"))) +
geom_smooth(aes(group = year, linetype = factor(year)), method = lm, se = F) +
geom_vline(aes(xintercept = mean(displ))) +
guides(color = guide_legend("SUV"))+
scale_colour_discrete(labels = c("False", "True"))+
guides(linetype = guide_legend("Year"))+
labs(x = "Engine displacement (litres)", y = "Highway miles (per gallon)")
#> `geom_smooth()` using formula 'y ~ x'
# 找預設顏色
# library(scales)
# show_col(hue_pal()(2))