1. 課題を見つける/仮説を立てる
2. 実験や観察をしてデータを集める
3. データを整理する
4. データを解析して仮説を検証する
- 生データをプロットするだけではつまらない…
- データの計算や解析もプログラミングで行おう!
※資料作成は岩嵜航さん(東北大学)にご協力いただきました。
(左右キーで進みます!)
Return to HOME
library(tidyverse) #読み込み result = diamonds %>% #diamondsのデータから select(carat, cut, price) %>% #carat, cut, priceだけ抽出して filter(carat > 2) %>% #carat > 2のデータを抽出して group_by(cut) %>% #cutごとにグループにして summarise_all(mean) %>% #すべての平均値を計算する print() #表示してみる
## # A tibble: 5 × 3 ## cut carat price ## <ord> <dbl> <dbl> ## 1 Fair 2.30 11972. ## 2 Good 2.14 14629. ## 3 Very Good 2.12 15133. ## 4 Premium 2.16 14992. ## 5 Ideal 2.15 15589.
install.packages("palmerpenguins") #インストールは最初の1回でOK
library(palmerpenguins) #penguinsを読み込み library(tidyverse) #tidyverseを読み込み
head(penguins) #penguinsの最初を見せて!
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen NA NA NA NA ## 5 Adelie Torgersen 36.7 19.3 193 3450 ## 6 Adelie Torgersen 39.3 20.6 190 3650 ## # ℹ 2 more variables: sex <fct>, year <int>
str(penguins) #penguinsデータの変数を確認
## tibble [344 × 8] (S3: tbl_df/tbl/data.frame) ## $ species : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ island : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ... ## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ... ## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ... ## $ flipper_length_mm: int [1:344] 181 186 195 NA 193 190 181 195 193 190 ... ## $ body_mass_g : int [1:344] 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ... ## $ sex : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ... ## $ year : int [1:344] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
result = penguins %>% #penguinsのデータから select(species, bill_length_mm) #species, bill_length_mm だけ選ぶ head(result) #表示
## # A tibble: 6 × 2 ## species bill_length_mm ## <fct> <dbl> ## 1 Adelie 39.1 ## 2 Adelie 39.5 ## 3 Adelie 40.3 ## 4 Adelie NA ## 5 Adelie 36.7 ## 6 Adelie 39.3
result = penguins %>% #penguinsのデータから select(1, 4) #1, 4列目を選ぶ head(result) #表示
## # A tibble: 6 × 2 ## species bill_depth_mm ## <fct> <dbl> ## 1 Adelie 18.7 ## 2 Adelie 17.4 ## 3 Adelie 18 ## 4 Adelie NA ## 5 Adelie 19.3 ## 6 Adelie 20.6
result = penguins %>% #penguinsのデータから select(starts_with("b")) #bで始まる列を選ぶ head(result) #表示
## # A tibble: 6 × 3 ## bill_length_mm bill_depth_mm body_mass_g ## <dbl> <dbl> <int> ## 1 39.1 18.7 3750 ## 2 39.5 17.4 3800 ## 3 40.3 18 3250 ## 4 NA NA NA ## 5 36.7 19.3 3450 ## 6 39.3 20.6 3650
result = penguins %>% #penguinsのデータから select(-c(species, island)) #speciesとisland以外 head(result) #表示
## # A tibble: 6 × 6 ## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year ## <dbl> <dbl> <int> <int> <fct> <int> ## 1 39.1 18.7 181 3750 male 2007 ## 2 39.5 17.4 186 3800 female 2007 ## 3 40.3 18 195 3250 female 2007 ## 4 NA NA NA NA <NA> 2007 ## 5 36.7 19.3 193 3450 female 2007 ## 6 39.3 20.6 190 3650 male 2007
result = penguins %>% #penguinsのデータから select(island, body_mass_g) #islandとbody_mass_gを選ぶ head(result) #表示
## # A tibble: 6 × 2 ## island body_mass_g ## <fct> <int> ## 1 Torgersen 3750 ## 2 Torgersen 3800 ## 3 Torgersen 3250 ## 4 Torgersen NA ## 5 Torgersen 3450 ## 6 Torgersen 3650
result = penguins %>% #penguinsのデータから filter(species == "Gentoo") #ジェンツーペンギンだけを抽出 head(result) #表示
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Gentoo Biscoe 46.1 13.2 211 4500 ## 2 Gentoo Biscoe 50 16.3 230 5700 ## 3 Gentoo Biscoe 48.7 14.1 210 4450 ## 4 Gentoo Biscoe 50 15.2 218 5700 ## 5 Gentoo Biscoe 47.6 14.5 215 5400 ## 6 Gentoo Biscoe 46.5 13.5 210 4550 ## # ℹ 2 more variables: sex <fct>, year <int>
result = penguins %>% #penguinsのデータから filter(species %in% c("Chinstrap", "Gentoo")) #ヒゲとジェンツーだけを抽出 head(result) #表示
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Gentoo Biscoe 46.1 13.2 211 4500 ## 2 Gentoo Biscoe 50 16.3 230 5700 ## 3 Gentoo Biscoe 48.7 14.1 210 4450 ## 4 Gentoo Biscoe 50 15.2 218 5700 ## 5 Gentoo Biscoe 47.6 14.5 215 5400 ## 6 Gentoo Biscoe 46.5 13.5 210 4550 ## # ℹ 2 more variables: sex <fct>, year <int>
result = penguins %>% #penguinsのデータから filter(bill_length_mm < 45) #bill_length_mmが45以下を抽出 head(result) #表示
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen 36.7 19.3 193 3450 ## 5 Adelie Torgersen 39.3 20.6 190 3650 ## 6 Adelie Torgersen 38.9 17.8 181 3625 ## # ℹ 2 more variables: sex <fct>, year <int>
gp = ggplot(data = penguins) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm, color = species)) gp
## Warning: Removed 2 rows containing missing values (`geom_point()`).
gp = ggplot(data = result) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm, color = species)) gp
result = penguins %>% filter(bill_length_mm < 45, bill_depth_mm > 16) head(result)
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen 36.7 19.3 193 3450 ## 5 Adelie Torgersen 39.3 20.6 190 3650 ## 6 Adelie Torgersen 38.9 17.8 181 3625 ## # ℹ 2 more variables: sex <fct>, year <int>
gp = ggplot(data = result) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm, color = species)) gp
result = penguins %>% filter(bill_length_mm < 40| bill_length_mm > 50) head(result)
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 36.7 19.3 193 3450 ## 4 Adelie Torgersen 39.3 20.6 190 3650 ## 5 Adelie Torgersen 38.9 17.8 181 3625 ## 6 Adelie Torgersen 39.2 19.6 195 4675 ## # ℹ 2 more variables: sex <fct>, year <int>
gp = ggplot(data = result) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm, color = species)) gp
result2 = penguins %>% filter(year >= 2008, island == "Dream", sex == "female") head(result2)
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Dream 37.3 17.8 191 3350 ## 2 Adelie Dream 36.9 18.6 189 3500 ## 3 Adelie Dream 38.9 18.8 190 3600 ## 4 Adelie Dream 35.7 18 202 3550 ## 5 Adelie Dream 34 17.1 185 3400 ## 6 Adelie Dream 36.2 17.3 187 3300 ## # ℹ 2 more variables: sex <fct>, year <int>
gp = ggplot(data = result2) + geom_point(aes(x = species, y = body_mass_g)) gp
gp = ggplot(data = result2) + geom_point(aes(x = species, y = body_mass_g, color = species)) gp
gp = ggplot(data = result2) + geom_point(aes(x = species, y = body_mass_g, color = species), position = position_jitter(width = 0.1)) #横にばらつかせる gp
result = penguins %>% mutate(bill_length_mm/bill_depth_mm) #(bill_length_mm)÷(bill_depth_mm) head(result)
## # A tibble: 6 × 9 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen NA NA NA NA ## 5 Adelie Torgersen 36.7 19.3 193 3450 ## 6 Adelie Torgersen 39.3 20.6 190 3650 ## # ℹ 3 more variables: sex <fct>, year <int>, ## # `bill_length_mm/bill_depth_mm` <dbl>
result = penguins %>% mutate(bill_length_mm/bill_depth_mm) #(bill_length_mm)÷(bill_depth_mm) head(result)
## # A tibble: 6 × 9 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen NA NA NA NA ## 5 Adelie Torgersen 36.7 19.3 193 3450 ## 6 Adelie Torgersen 39.3 20.6 190 3650 ## # ℹ 3 more variables: sex <fct>, year <int>, ## # `bill_length_mm/bill_depth_mm` <dbl>
result = penguins %>% mutate(slenderness = bill_length_mm/bill_depth_mm) #slendernessと名付ける head(result)
## # A tibble: 6 × 9 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen NA NA NA NA ## 5 Adelie Torgersen 36.7 19.3 193 3450 ## 6 Adelie Torgersen 39.3 20.6 190 3650 ## # ℹ 3 more variables: sex <fct>, year <int>, slenderness <dbl>
result = penguins %>% mutate(large_small = if_else( body_mass_g < 4000, "<4000", ">4000" )) # body_mass_g が4000未満か, TRUEなら"<4000", FALSEなら">4000"
result = penguins %>% mutate(large_small = case_when( flipper_length_mm > 200 & body_mass_g > 4000 ~ "long_large", flipper_length_mm < 200 & body_mass_g > 4000 ~ "short_large", flipper_length_mm > 200 & body_mass_g < 4000 ~ "long_small", flipper_length_mm < 200 & body_mass_g < 4000 ~ "short_small", TRUE ~ "others" #全て条件外の場合の指定 ))
result = penguins %>% mutate(ratio = flipper_length_mm/body_mass_g) %>% mutate(group = if_else(ratio >= 0.05, "large", "small")) head(result)
## # A tibble: 6 × 10 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen NA NA NA NA ## 5 Adelie Torgersen 36.7 19.3 193 3450 ## 6 Adelie Torgersen 39.3 20.6 190 3650 ## # ℹ 4 more variables: sex <fct>, year <int>, ratio <dbl>, group <chr>
result = penguins %>% summarize(mean(bill_length_mm)) #くちばしの長さを平均する head(result)
## # A tibble: 1 × 1 ## `mean(bill_length_mm)` ## <dbl> ## 1 NA
result = penguins %>% summarize(bill_length_mean = mean(bill_length_mm, na.rm = TRUE)) #くちばしの長さを平均する head(result)
## # A tibble: 1 × 1 ## bill_length_mean ## <dbl> ## 1 43.9
result = penguins %>% group_by(species) %>% #speciesごとにグループ分けして summarize(bill_length_mean = mean(bill_length_mm, na.rm = TRUE), .groups = "drop") #グループわけ解除 head(result)
## # A tibble: 3 × 2 ## species bill_length_mean ## <fct> <dbl> ## 1 Adelie 38.8 ## 2 Chinstrap 48.8 ## 3 Gentoo 47.5
result = penguins %>% group_by(species) %>% #speciesごとにグループ分けして summarize(bill_length_mean = mean(bill_length_mm, na.rm = TRUE), bill_depth_median = median(bill_depth_mm, na.rm = TRUE), #中央値 .groups = "drop") head(result)
## # A tibble: 3 × 3 ## species bill_length_mean bill_depth_median ## <fct> <dbl> <dbl> ## 1 Adelie 38.8 18.4 ## 2 Chinstrap 48.8 18.4 ## 3 Gentoo 47.5 15
result = penguins %>% group_by(species, island) %>% #species, islandごとにグループ分けして summarize(bill_length_mean = mean(bill_length_mm, na.rm = TRUE), bill_depth_median = median(bill_depth_mm, na.rm = TRUE), #中央値 .groups = "drop") head(result)
## # A tibble: 5 × 4 ## species island bill_length_mean bill_depth_median ## <fct> <fct> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 18.5 ## 2 Adelie Dream 38.5 18.4 ## 3 Adelie Torgersen 39.0 18.4 ## 4 Chinstrap Dream 48.8 18.4 ## 5 Gentoo Biscoe 47.5 15
result = penguins %>% group_by(species, island) %>% #species, islandごとにグループ分けして summarize(across(c(bill_length_mm, bill_depth_mm), c(mean, sd), na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarize()`. ## ℹ In argument: `across(c(bill_length_mm, bill_depth_mm), c(mean, sd), na.rm = ## TRUE)`. ## ℹ In group 1: `species = Adelie`, `island = Biscoe`. ## Caused by warning: ## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0. ## Supply arguments directly to `.fns` through an anonymous function instead. ## ## # Previously ## across(a:b, mean, na.rm = TRUE) ## ## # Now ## across(a:b, \(x) mean(x, na.rm = TRUE))
head(result)
## # A tibble: 5 × 6 ## species island bill_length_mm_1 bill_length_mm_2 bill_depth_mm_1 ## <fct> <fct> <dbl> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 2.48 18.4 ## 2 Adelie Dream 38.5 2.47 18.3 ## 3 Adelie Torgersen 39.0 3.03 18.4 ## 4 Chinstrap Dream 48.8 3.34 18.4 ## 5 Gentoo Biscoe 47.5 3.08 15.0 ## # ℹ 1 more variable: bill_depth_mm_2 <dbl>
result2 = penguins %>% group_by(species, sex) %>% #species, sexごとにグループ分けして summarize(body_mass_mean = mean(body_mass_g, na.rm = TRUE), #mean body_mass_sd = sd(body_mass_g, na.rm = TRUE), #sd .groups = "drop") head(result2)
## # A tibble: 6 × 4 ## species sex body_mass_mean body_mass_sd ## <fct> <fct> <dbl> <dbl> ## 1 Adelie female 3369. 269. ## 2 Adelie male 4043. 347. ## 3 Adelie <NA> 3540 477. ## 4 Chinstrap female 3527. 285. ## 5 Chinstrap male 3939. 362. ## 6 Gentoo female 4680. 282.
gp = ggplot(data = result2) + geom_col(aes(x = sex, y = body_mass_mean)) + geom_errorbar(aes(x = sex, ymin = body_mass_mean - body_mass_sd, ymax = body_mass_mean + body_mass_sd), width = 0.2) + facet_wrap(~species) gp
penguins_1 = penguins %>% select(species, bill_length_mm) %>% rownames_to_column() #行数を列にする head(penguins_1)
## # A tibble: 6 × 3 ## rowname species bill_length_mm ## <chr> <fct> <dbl> ## 1 1 Adelie 39.1 ## 2 2 Adelie 39.5 ## 3 3 Adelie 40.3 ## 4 4 Adelie NA ## 5 5 Adelie 36.7 ## 6 6 Adelie 39.3
penguins_2 = penguins %>% select(island, body_mass_g) %>% rownames_to_column() #行数を列にする head(penguins_2)
## # A tibble: 6 × 3 ## rowname island body_mass_g ## <chr> <fct> <int> ## 1 1 Torgersen 3750 ## 2 2 Torgersen 3800 ## 3 3 Torgersen 3250 ## 4 4 Torgersen NA ## 5 5 Torgersen 3450 ## 6 6 Torgersen 3650
penguins_merge = bind_cols(penguins_1, penguins_2) # peguins_1とpenguins_2を結合
## New names: ## • `rowname` -> `rowname...1` ## • `rowname` -> `rowname...4`
head(penguins_merge)
## # A tibble: 6 × 6 ## rowname...1 species bill_length_mm rowname...4 island body_mass_g ## <chr> <fct> <dbl> <chr> <fct> <int> ## 1 1 Adelie 39.1 1 Torgersen 3750 ## 2 2 Adelie 39.5 2 Torgersen 3800 ## 3 3 Adelie 40.3 3 Torgersen 3250 ## 4 4 Adelie NA 4 Torgersen NA ## 5 5 Adelie 36.7 5 Torgersen 3450 ## 6 6 Adelie 39.3 6 Torgersen 3650
- leftはデータフレーム1, rightはデータフレーム2のこと - left_joinは左に合わせる、right_joinは右に合わせる…
penguins_merge = left_join(penguins_1, penguins_2, by = "rowname") head(penguins_merge)
## # A tibble: 6 × 5 ## rowname species bill_length_mm island body_mass_g ## <chr> <fct> <dbl> <fct> <int> ## 1 1 Adelie 39.1 Torgersen 3750 ## 2 2 Adelie 39.5 Torgersen 3800 ## 3 3 Adelie 40.3 Torgersen 3250 ## 4 4 Adelie NA Torgersen NA ## 5 5 Adelie 36.7 Torgersen 3450 ## 6 6 Adelie 39.3 Torgersen 3650
penguins_wide = penguins %>% pivot_wider(names_from = year, #yearの列を横に並べて列名に values_from = body_mass_g) #値にはbody_mass_g列の値が入る head(penguins_wide)
## # A tibble: 6 × 9 ## species island bill_length_mm bill_depth_mm flipper_length_mm sex `2007` ## <fct> <fct> <dbl> <dbl> <int> <fct> <int> ## 1 Adelie Torgersen 39.1 18.7 181 male 3750 ## 2 Adelie Torgersen 39.5 17.4 186 female 3800 ## 3 Adelie Torgersen 40.3 18 195 female 3250 ## 4 Adelie Torgersen NA NA NA <NA> NA ## 5 Adelie Torgersen 36.7 19.3 193 female 3450 ## 6 Adelie Torgersen 39.3 20.6 190 male 3650 ## # ℹ 2 more variables: `2008` <int>, `2009` <int>
penguins_long = penguins_wide %>% pivot_longer(cols = c(`2007`, `2008`, `2009`), #これらを縦に並べて値に names_to = "YEAR", #新たな列名はYEAR(文字列なので""で囲む) values_to = "BODY_MASS_G") #元々の値を入れる列名はBODY_MASS_Gに head(penguins_long, n = 5)
## # A tibble: 5 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm sex YEAR ## <fct> <fct> <dbl> <dbl> <int> <fct> <chr> ## 1 Adelie Torgersen 39.1 18.7 181 male 2007 ## 2 Adelie Torgersen 39.1 18.7 181 male 2008 ## 3 Adelie Torgersen 39.1 18.7 181 male 2009 ## 4 Adelie Torgersen 39.5 17.4 186 female 2007 ## 5 Adelie Torgersen 39.5 17.4 186 female 2008 ## # ℹ 1 more variable: BODY_MASS_G <int>
head(penguins) #penguinsデータを確認
## # A tibble: 6 × 8 ## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ## <fct> <fct> <dbl> <dbl> <int> <int> ## 1 Adelie Torgersen 39.1 18.7 181 3750 ## 2 Adelie Torgersen 39.5 17.4 186 3800 ## 3 Adelie Torgersen 40.3 18 195 3250 ## 4 Adelie Torgersen NA NA NA NA ## 5 Adelie Torgersen 36.7 19.3 193 3450 ## 6 Adelie Torgersen 39.3 20.6 190 3650 ## # ℹ 2 more variables: sex <fct>, year <int>
## # A tibble: 688 × 3 ## species PARTS VALUE ## <fct> <chr> <dbl> ## 1 Adelie bill_length_mm 39.1 ## 2 Adelie bill_depth_mm 18.7 ## 3 Adelie bill_length_mm 39.5 ## 4 Adelie bill_depth_mm 17.4 ## 5 Adelie bill_length_mm 40.3 ## 6 Adelie bill_depth_mm 18 ## 7 Adelie bill_length_mm NA ## 8 Adelie bill_depth_mm NA ## 9 Adelie bill_length_mm 36.7 ## 10 Adelie bill_depth_mm 19.3 ## # ℹ 678 more rows
penguins2 = penguins %>% #penguins2に格納 pivot_longer(cols = c(bill_length_mm, bill_depth_mm), #列を指定 names_to = "PARTS", values_to = "VALUE") #新たな列の名前 head(penguins2)
## # A tibble: 6 × 8 ## species island flipper_length_mm body_mass_g sex year PARTS VALUE ## <fct> <fct> <int> <int> <fct> <int> <chr> <dbl> ## 1 Adelie Torgersen 181 3750 male 2007 bill_lengt… 39.1 ## 2 Adelie Torgersen 181 3750 male 2007 bill_depth… 18.7 ## 3 Adelie Torgersen 186 3800 female 2007 bill_lengt… 39.5 ## 4 Adelie Torgersen 186 3800 female 2007 bill_depth… 17.4 ## 5 Adelie Torgersen 195 3250 female 2007 bill_lengt… 40.3 ## 6 Adelie Torgersen 195 3250 female 2007 bill_depth… 18
gp = ggplot(data = penguins2) + geom_point(aes(x = PARTS, y = VALUE)) + facet_wrap(~species) gp
## Warning: Removed 4 rows containing missing values (`geom_point()`).
penguins2_sep = penguins2 %>% separate(col = PARTS, into = c("A", "B", "C"), sep = "_") #intoは文字列ベクターで head(penguins2_sep)
## # A tibble: 6 × 10 ## species island flipper_length_mm body_mass_g sex year A B C ## <fct> <fct> <int> <int> <fct> <int> <chr> <chr> <chr> ## 1 Adelie Torgersen 181 3750 male 2007 bill leng… mm ## 2 Adelie Torgersen 181 3750 male 2007 bill depth mm ## 3 Adelie Torgersen 186 3800 female 2007 bill leng… mm ## 4 Adelie Torgersen 186 3800 female 2007 bill depth mm ## 5 Adelie Torgersen 195 3250 female 2007 bill leng… mm ## 6 Adelie Torgersen 195 3250 female 2007 bill depth mm ## # ℹ 1 more variable: VALUE <dbl>
gp = ggplot(data = penguins2_sep) + geom_point(aes(x = B, y = VALUE)) + facet_wrap(~species) gp
## Warning: Removed 4 rows containing missing values (`geom_point()`).
penguins2_unite = penguins2_sep %>% unite(col = "PARTS", c(A, B, C), sep = ".") head(penguins2_unite)
## # A tibble: 6 × 8 ## species island flipper_length_mm body_mass_g sex year PARTS VALUE ## <fct> <fct> <int> <int> <fct> <int> <chr> <dbl> ## 1 Adelie Torgersen 181 3750 male 2007 bill.lengt… 39.1 ## 2 Adelie Torgersen 181 3750 male 2007 bill.depth… 18.7 ## 3 Adelie Torgersen 186 3800 female 2007 bill.lengt… 39.5 ## 4 Adelie Torgersen 186 3800 female 2007 bill.depth… 17.4 ## 5 Adelie Torgersen 195 3250 female 2007 bill.lengt… 40.3 ## 6 Adelie Torgersen 195 3250 female 2007 bill.depth… 18
library(tidyverse) #読み込み result = diamonds %>% #diamondsのデータから select(carat, cut, price) %>% #carat, cut, priceだけ抽出して filter(carat > 2) %>% #carat > 2のデータを抽出して group_by(cut) %>% #cutごとにグループにして summarise_all(mean) %>% #すべての平均値を計算する print() #表示してみる