8  dplyr 更改或新建

当你添加一个手机号到通讯录,手机会问你是“新建联系人”还是“添加到已有联系人”。这是两个不同的操作。不过,dplyr 的mutate()函数,将它们合二为一了。

8.1 rename()

library(tidyverse)

# 把 price 改名为 price_usd
diamonds %>%
  rename(price_usd = price)
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price_usd     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl>     <int> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55       326  3.95  3.98  2.43
 2  0.21 Premium   E     SI1      59.8    61       326  3.89  3.84  2.31
 3  0.23 Good      E     VS1      56.9    65       327  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58       334  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58       335  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57       336  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57       336  3.95  3.98  2.47
 8  0.26 Very Good H     SI1      61.9    55       337  4.07  4.11  2.53
 9  0.22 Fair      E     VS2      65.1    61       337  3.87  3.78  2.49
10  0.23 Very Good H     VS1      59.4    61       338  4     4.05  2.39
# ℹ 53,930 more rows
# 把 x, y, z 分别改名为 size_x, size_y, size_z
diamonds %>%
  rename(
    size_x = x,
    size_y = y,
    size_z = z
  )
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price size_x size_y size_z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int>  <dbl>  <dbl>  <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326   3.95   3.98   2.43
 2  0.21 Premium   E     SI1      59.8    61   326   3.89   3.84   2.31
 3  0.23 Good      E     VS1      56.9    65   327   4.05   4.07   2.31
 4  0.29 Premium   I     VS2      62.4    58   334   4.2    4.23   2.63
 5  0.31 Good      J     SI2      63.3    58   335   4.34   4.35   2.75
 6  0.24 Very Good J     VVS2     62.8    57   336   3.94   3.96   2.48
 7  0.24 Very Good I     VVS1     62.3    57   336   3.95   3.98   2.47
 8  0.26 Very Good H     SI1      61.9    55   337   4.07   4.11   2.53
 9  0.22 Fair      E     VS2      65.1    61   337   3.87   3.78   2.49
10  0.23 Very Good H     VS1      59.4    61   338   4      4.05   2.39
# ℹ 53,930 more rows

8.2 mutate()

# 新建变量 price_rmb,即换算为人民币,假设汇率为7
diamonds %>%
  mutate(price_rmb = price * 7)
# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z price_rmb
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>     <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43      2282
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31      2282
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31      2289
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63      2338
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75      2345
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48      2352
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47      2352
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53      2359
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49      2359
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39      2366
# ℹ 53,930 more rows
# 更改变量 price,换算为人民币,假设汇率为7
diamonds %>%
  mutate(price = price * 7)
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55  2282  3.95  3.98  2.43
 2  0.21 Premium   E     SI1      59.8    61  2282  3.89  3.84  2.31
 3  0.23 Good      E     VS1      56.9    65  2289  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58  2338  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58  2345  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57  2352  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57  2352  3.95  3.98  2.47
 8  0.26 Very Good H     SI1      61.9    55  2359  4.07  4.11  2.53
 9  0.22 Fair      E     VS2      65.1    61  2359  3.87  3.78  2.49
10  0.23 Very Good H     VS1      59.4    61  2366  4     4.05  2.39
# ℹ 53,930 more rows
# 新建变量 size,由 x, y, z 相乘而得(不代表钻石真实体积)
diamonds %>%
  mutate(size = x * y * z)
# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z  size
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43  38.2
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31  34.5
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31  38.1
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63  46.7
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75  51.9
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48  38.7
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47  38.8
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53  42.3
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49  36.4
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39  38.7
# ℹ 53,930 more rows