# 請勿更動此 code chunk 程式碼
library(dplyr)
library(ggplot2)
# 上週作業使用到的 titanic data
<- readr::read_delim("titanic.csv",
titanic delim = ";",
escape_double = FALSE,
trim_ws = TRUE)
# 上週作業 `分組摘要` 的結果
<- titanic %>%
died_summary group_by(Sex, Pclass) %>%
summarise(percent_survived = mean(Survived == "Yes")) %>%
ungroup()
died_summary
#> # A tibble: 6 x 3
#> Sex Pclass percent_survived
#> <chr> <dbl> <dbl>
#> 1 female 1 0.968
#> 2 female 2 0.921
#> 3 female 3 0.5
#> 4 male 1 0.369
#> 5 male 2 0.157
#> 6 male 3 0.135
此題延續上週作業關於鐵達尼號乘客死亡率的分組摘要。上方的程式碼即是上週分組摘要的答案,儲存於 died_summary
。你的任務是使用 ggplot2
將 died_summary
繪製成此長條圖:
# Write your code here
ggplot(data = died_summary) +
geom_bar(aes(x = Pclass, y = percent_survived), stat = "identity") +
facet_wrap(vars(Sex))
geom_bar()
或是 geom_col()
facet_wrap()
請自行尋找一份資料 (不得使用 titanic.csv
或內建資料),將其放在此次作業的 repo 並命名為 mydata.csv
(副檔名請根據自己的資料而定, e.g., 若為 tab 分隔檔,請命名為 mydata.tsv
)。你的任務是將這份資料讀入並使用 ggplot2 視覺化這份資料。
(10 分) 資料讀取與清理
將 mydata.csv
讀入並進行資料清理 (如果需要的話),以利接下來的資料視覺化
(30 分) 資料視覺化
請依這份資料的特性以及你想觀察的現象,對這份資料進行視覺化。依據你的喜好,你可以畫任意多張圖,但其中一張圖裡「必須」使用到 2 種或 2 種以上的 geom_*()
函數 (助教也只會依據這張圖評分)。這些 geom_*()
的使用需合理。例如,下方的例子雖然仍畫得出圖,但顯然是不合理的,這種情況將不予給分:
ggplot(iris) +
geom_bar(aes(x = Species)) +
geom_point(aes(Sepal.Length, Petal.Width))
(10 分) Tweak the plot
請依據你的個人偏好「修改」於 2.
所繪製出來的圖。例如,你可以使用某個 coord_*()
將圖的 x、y 軸對調;使用其它的風格;或是修改與新增圖的座標軸名稱與標題等。
若覺得題目說明不夠清楚,可以參考此題的範例。
# Write your code here
# 請務必印出 data frame
library(dplyr)
library(ggplot2)
# 上週作業使用到的 titanic data
setwd("/Users/stevetai/Documents/GitHub/hw6-Stevie4231")
<- readr::read_delim("mydata.csv",
mydata delim = ",",
escape_double = FALSE,
trim_ws = TRUE)
#>
#> ─ Column specification ────────────────────────────
#> cols(
#> .default = col_character(),
#> GP = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
# 選擇造成傷害與獲得經濟為衡量指摽
<- mutate(mydata, GOLD = as.numeric(sub("%","",GOLD))/100) mydata
#> Warning in mask$eval_all_mutate(quo): 強制變更過程中產生了 NA
<- mutate(mydata, DMG = as.numeric(sub("%","",DMG))/100) mydata
#> Warning in mask$eval_all_mutate(quo): 強制變更過程中產生了 NA
# 因資料量過大,故只選擇功能為純輸出的射手(ADC)進行分析
<- filter(mydata, Pos == "ADC")
mydata # 該筆資料為ADC選手玩輔助,故汰除極值
<- filter(mydata, Champion != "Tahm Kench")
mydata mydata
#> # A tibble: 17 x 25
#> Champion Pos GP P B PB W CTR K D A KDA
#> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Aphelios ADC 43 20.0% 25.1% 45.1% 51% 63% 156 99 162 3.2
#> 2 Ashe ADC 1 0.5% 0.0% 0.5% 0% 100% 1 4 7 2
#> 3 Ezreal ADC 15 7.0% 6.5% 13.5% 73% 73% 71 24 52 5.1
#> 4 Jhin ADC 13 6.0% 0.0% 6.0% 23% 62% 42 31 41 2.7
#> 5 Jinx ADC 17 7.9% 1.9% 9.8% 65% 82% 80 30 86 5.5
#> 6 Kai'Sa ADC 153 71.2% 16.3% 87.4% 46% 17% 637 316 540 3.7
#> 7 Kalista ADC 16 7.4% 27.4% 34.9% 63% 63% 63 32 95 4.9
#> 8 Kog'Maw ADC 1 0.5% 0.0% 0.5% 0% 100% 2 3 2 1.3
#> 9 Lucian ADC 1 0.5% 20.9% 27.9% 0% 100% 1 3 4 1.7
#> 10 Miss Fo… ADC 3 1.4% 0.5% 1.9% 33% 100% 7 8 8 1.9
#> 11 Samira ADC 61 28.4% 20.9% 49.3% 43% 64% 267 140 203 3.4
#> 12 Senna ADC 17 7.9% 11.6% 20.5% 76% 53% 73 25 145 8.7
#> 13 Sivir ADC 1 0.5% 0.0% 0.5% 0% 100% 2 3 3 1.7
#> 14 Tristana ADC 35 16.3% 13.5% 30.2% 49% 43% 126 69 130 3.7
#> 15 Varus ADC 4 1.9% 0.5% 2.3% 75% 50% 13 8 33 5.8
#> 16 Vayne ADC 8 3.7% 0.0% 3.7% 50% 100% 31 19 25 2.9
#> 17 Xayah ADC 40 18.6% 5.1% 23.7% 58% 95% 140 85 172 3.7
#> # … with 13 more variables: KP <chr>, DTH <chr>, FB <chr>, GD10 <chr>,
#> # XPD10 <chr>, CSD10 <chr>, CSPM <chr>, CSP15 <chr>, DPM <chr>, DMG <dbl>,
#> # GOLD <dbl>, WPM <chr>, WCPM <chr>
# Write your code here
# 請務必印出圖片
library(ggrepel)
<- ggplot(mydata, aes(x = GOLD, y = DMG, label = paste(Champion,'(',GP,')')), stat = "identity") +
ADC geom_point() +
geom_vline(aes(xintercept = mean(GOLD))) +
geom_hline(aes(yintercept = mean(DMG))) +
geom_text_repel(size = 3) +
labs(x = "GOLD(%)", y = "DMG(%)", title = "LCK 2021 Spring ADC Picks Analysis", subtitle = 'Measured by Gold Consumed (team%) to Damage Done (team%) relationship\n*() after Champion indicates games played')
ADC
# Write your code here
<- ggplot(mydata, aes(x = GOLD, y = DMG, label = paste(Champion,'(',GP,')')), stat = "identity") +
ADC geom_rect(aes(xmin = mean(GOLD), ymin = mean(DMG), xmax = Inf, ymax = Inf, fill = 'Hyper Carry')) +
geom_rect(aes(xmax = mean(GOLD), ymin = mean(DMG), xmin = -Inf, ymax = Inf, fill = 'Efficient Carry')) +
geom_rect(aes(xmin = mean(GOLD), ymax = mean(DMG), xmax = Inf, ymin = -Inf, fill = 'Situational Carry')) +
geom_rect(aes(xmax = mean(GOLD), ymax = mean(DMG), xmin = -Inf, ymin = -Inf, fill = 'Utility Carry')) +
geom_point() +
geom_vline(aes(xintercept = mean(GOLD))) +
geom_hline(aes(yintercept = mean(DMG))) +
geom_text_repel(size = 3) +
labs(x = "GOLD(%)", y = "DMG(%)", title = "LCK 2021 Spring ADC Picks Analysis", subtitle = 'Measured by Gold Consumed (team%) to Damage Done (team%) relationship\n*() after Champion indicates games played', fill = 'ADC Categories')
ADC
請使用 ggplot2
中的 mpg
這份資料繪製圖表。 (可使用 ?mpg
查看這份資料的說明)
class
是否為 SUV。 (6分)displ
和「每加侖可高速行駛英里」 hwy
的線性回歸線,並將「年分」 year
以不同線條類型標示,且不須繪製信心區間 (請使用 geom_smooth()
)。(6分)displ
的平均值。(6分)SUV
和 Year
。(2分)# Modify the code below
ggplot(data = mpg, mapping = aes(displ, hwy)) +
geom_point(aes(color = factor(class == 'suv'))) +
geom_vline(aes(xintercept = mean(displ))) +
geom_smooth(aes(linetype = as.factor(year)), se = FALSE, method = lm) +
labs(x = "Engine displacement (litres)", y = "Highway miles (per gallon)", color = 'SUV', linetype = 'YEAR')
#> `geom_smooth()` using formula 'y ~ x'