1 データ操作

Rにおけるデータハンドリング (データ操作)は{dplyr}と{tidyr}が便利である.

パイプ演算子(%>%)を使うとコードの可読性が高まる. パイプ演算子%>%は左辺の出力を右辺の関数の第1引数に渡す.ショートカットはWin:Shift+Ctrl+M,Mac:Shift+command+M

2 pipe演算子

library(tidyverse)
library(magrittr)
library(dplyr)
library(psych)
# 処理1
# x=1:10 , y=10:19 , g=as.factor(rep(1:2 , 5)) というdata.frameを作成
x <- c(1:10);g <- as.factor(rep(1:2 , 5))
d <- data.frame(x,g)
d
##     x g
## 1   1 1
## 2   2 2
## 3   3 1
## 4   4 2
## 5   5 1
## 6   6 2
## 7   7 1
## 8   8 2
## 9   9 1
## 10 10 2
#gでグループ化
g1 <- subset(d , subset = g == "1")
g2 <- subset(d , subset = g == "2")
#g1のsummaryを算出
mean(g1$x);mean(g2$x)
## [1] 5
## [1] 6
# 処理2:lapply関数を使い1行でコーディング(gが2なので2行になる)
lapply(subset(data.frame(x = c(1:10) , 
                         g = as.factor(rep(1:2 , 5))),subset = g == "1"),summary)
## $x
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       3       5       5       7       9 
## 
## $g
## 1 2 
## 5 0
lapply(subset(data.frame(x = c(1:10) ,
                         g = as.factor(rep(1:2 , 5))),subset = g == "2"),summary)
## $x
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       4       6       6       8      10 
## 
## $g
## 1 2 
## 0 5
# 処理3:pipe演算子を使う
data.frame(x = c(1:10) , y = c(10:19) , g = as.factor(rep(1:2 , 5)))%>% # data.frame
    subset( subset = g == "1") %>%                                      # subset
    summary()                                                           # summary
##        x           y      g    
##  Min.   :1   Min.   :10   1:5  
##  1st Qu.:3   1st Qu.:12   2:0  
##  Median :5   Median :14        
##  Mean   :5   Mean   :14        
##  3rd Qu.:7   3rd Qu.:16        
##  Max.   :9   Max.   :18

処理1は1stepごとにオブジェクトに代入しながら処理:何をしているかわかりやすいが冗長 処理2はlapply関数に 1行でコーディング:極めて可読性が悪い 処理3は処理1と同様1stepごとにコーディングしているが代入せず,左辺を右辺に渡している.可読性が高い.

# 簡単な例
data("iris")
colnames(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
iris %>% 
    group_by(Species) %>%
    summarise(n = n())
## # A tibble: 3 × 2
##   Species        n
##   <fct>      <int>
## 1 setosa        50
## 2 versicolor    50
## 3 virginica     50
iris %>% 
    group_by(Species) %>%
    summarise(ave.sl = mean(Sepal.Length),
              ave.sw = mean(Sepal.Width),
              ave.pw = mean(Petal.Width))
## # A tibble: 3 × 4
##   Species    ave.sl ave.sw ave.pw
##   <fct>       <dbl>  <dbl>  <dbl>
## 1 setosa       5.01   3.43  0.246
## 2 versicolor   5.94   2.77  1.33 
## 3 virginica    6.59   2.97  2.03

3 library,object,console操作

#library(MASS)          # libtary:読み込み
#detach("package:MASS") # libtary:取り外し

#ls()                # メモリ上にあるオブジェクト名を確認
#rm(list=ls())       # メモリ上にあるすべてのオブジェクトを消去
## Environmentで確認、「箒」アイコンで消去でも可

## Consoleへの出力結果の外部保存;sink関数
# 最初に以下のコードを実行してから分析をはじめ
#sink("out.txt" , append = T) # append = Tで上書きせず追加
#sink("data/out.txt" , append =T) # dataフォルダにout.txtで保存

#sink() # 出力終了
# 最後に上記のコードを実行して分析を終了

4 欠測値の入力

# data入力時欠測値をNAと入力していれば問題なし
# blank:読み込み時NAになる.ピリオドや記号,数値で入力されている場合は
# NA指定する必要がある.指定しないとNA扱いにならない,
# data読込時,引数na.strings=()で指定する.
#  read.csv(file,  na.strings = (c("99", "."))) # "99"と"."をNAに指定

5 欠測値処理

#CSVデータでは,欠測値はブランクにしておく.Rで読み込むと自動的にNA(Not Available)となる.
#ピリオドや記号,数値で入力されている場合は指定する.指定しないとNAにならない
#文字列データのブランクを欠測値とする時:na.strings=""
# data読込時,引数na.strings=()で指定する.
# read.csv(file名, na.strings = (c("99", "."))) # "99"と"."をNAに指定

library(MASS)
data(Cars93)
#欠測値の確認
#is.na(Cars93) # dataが全て出力される
anyNA(Cars93) # TRUEでNAあり
## [1] TRUE
#どの行に欠測値があるかを確認する
complete.cases(Cars93)# FALSE;欠測値有り
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [61]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [85]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
#欠測値を含む行を表示
Cars93[!complete.cases(Cars93),] # !は演算子not
##    Manufacturer      Model   Type Min.Price Price Max.Price MPG.city
## 16    Chevrolet Lumina_APV    Van      14.7  16.3      18.0       18
## 17    Chevrolet      Astro    Van      14.7  16.6      18.6       15
## 19    Chevrolet   Corvette Sporty      34.6  38.0      41.5       17
## 26        Dodge    Caravan    Van      13.6  19.0      24.4       17
## 36         Ford   Aerostar    Van      14.5  19.9      25.3       15
## 56        Mazda        MPV    Van      16.6  19.1      21.7       18
## 57        Mazda       RX-7 Sporty      32.5  32.5      32.5       17
## 66       Nissan      Quest    Van      16.7  19.1      21.5       17
## 70   Oldsmobile Silhouette    Van      19.5  19.5      19.5       18
## 87       Toyota     Previa    Van      18.9  22.7      26.6       18
## 89   Volkswagen    Eurovan    Van      16.6  19.7      22.7       17
##    MPG.highway     AirBags DriveTrain Cylinders EngineSize Horsepower  RPM
## 16          23        None      Front         6        3.8        170 4800
## 17          20        None        4WD         6        4.3        165 4000
## 19          25 Driver only       Rear         8        5.7        300 5000
## 26          21 Driver only        4WD         6        3.0        142 5000
## 36          20 Driver only        4WD         6        3.0        145 4800
## 56          24        None        4WD         6        3.0        155 5000
## 57          25 Driver only       Rear    rotary        1.3        255 6500
## 66          23        None      Front         6        3.0        151 4800
## 70          23        None      Front         6        3.8        170 4800
## 87          22 Driver only        4WD         4        2.4        138 5000
## 89          21        None      Front         5        2.5        109 4500
##    Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 16         1690              No               20.0          7    178       110
## 17         1790              No               27.0          8    194       111
## 19         1450             Yes               20.0          2    179        96
## 26         1970              No               20.0          7    175       112
## 36         2080             Yes               21.0          7    176       119
## 56         2240              No               19.6          7    190       110
## 57         2325             Yes               20.0          2    169        96
## 66         2065              No               20.0          7    190       112
## 70         1690              No               20.0          7    194       110
## 87         2515             Yes               19.8          7    187       113
## 89         2915             Yes               21.1          7    187       115
##    Width Turn.circle Rear.seat.room Luggage.room Weight  Origin
## 16    74          44           30.5           NA   3715     USA
## 17    78          42           33.5           NA   4025     USA
## 19    74          43             NA           NA   3380     USA
## 26    72          42           26.5           NA   3705     USA
## 36    72          45           30.0           NA   3735     USA
## 56    72          39           27.5           NA   3735 non-USA
## 57    69          37             NA           NA   2895 non-USA
## 66    74          41           27.0           NA   4100 non-USA
## 70    74          44           30.5           NA   3715     USA
## 87    71          41           35.0           NA   3785 non-USA
## 89    72          38           34.0           NA   3960 non-USA
##                     Make
## 16  Chevrolet Lumina_APV
## 17       Chevrolet Astro
## 19    Chevrolet Corvette
## 26         Dodge Caravan
## 36         Ford Aerostar
## 56             Mazda MPV
## 57            Mazda RX-7
## 66          Nissan Quest
## 70 Oldsmobile Silhouette
## 87         Toyota Previa
## 89    Volkswagen Eurovan

5.1 欠測値処理(リストワイズ削除が基本)

df.omit <- na.omit(df)                        #na.omit()関数:NAをomitしたdata作成
#df.omit <- df[complete.cases(df),]           #na.omit()関数と同じ
nrow(df.omit)                                 #リストワイズ削除後のサンプル数
## NULL

6 欠測値の置換

na.dat <- c(1:10,NA)
na.dat
##  [1]  1  2  3  4  5  6  7  8  9 10 NA
na.dat[is.na(na.dat)] <- 0 # NAを置換;NAを0に置換
na.dat
##  [1]  1  2  3  4  5  6  7  8  9 10  0
na.dat[na.dat == 0] <- NA   # NAに置換;0をNAに置換
na.dat
##  [1]  1  2  3  4  5  6  7  8  9 10 NA
# データフレーム全体を処理
data(Cars93)
Cars93[is.na(Cars93)] <- 99999 # 欠測値を99999に置換
Cars93[Cars93 == 99999] <- NA   # 99999をNAに置換

7 欠測値と関数

data(Cars93)
#欠測値有りを許容しない関数を使う場合;欠測値を除外したdataを作成
Cars93.naomit <- na.omit(Cars93)
nrow(Cars93.naomit) # 82サンプル
## [1] 82
nrow(Cars93)    # 93サンプル
## [1] 93
#欠測値の除外を引数で指定できる関数の場合
mean(Cars93$Luggage.room)  # 返り値 NA mean関数は欠測値除外をしないと計算しない
## [1] NA
mean(Cars93$Luggage.room, na.rm=TRUE)
## [1] 13.89024
cor(Cars93$Length , Cars93$Luggage.room) # 返り値 NA cor関数は欠測値除外をしないと計算しない
## [1] NA
cor(Cars93$Length , Cars93$Luggage.room, use="pairwise.complete.obs") # ペワイズ削除を指定
## [1] 0.7129622

8 データ値の操作

8.1 連続量を階級値に分割

summary(Cars93$Price) # 5数要約
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.40   12.20   17.70   19.51   23.30   61.90
Cars93$cate.L <- cut( Cars93$Price,              #cut()関数
               breaks = c(7.4 , 12.2 , 17.7, 23.3 , 61.9),     #区切り値
        ##階級幅:7.4-12.2未満,12.2以上-17.7未満,17.7以上-23.3未満,23.3以上-61.9
               labels= c("L","LM","M","H"),         #ラベル
               right=F,       #T:7.4-12.2以下,12.2より大-19.52以下
               ordered_result = T)      #順序性
table(Cars93$cate.L)                     #分布の確認
## 
##  L LM  M  H 
## 23 23 23 23
names(Cars93)              # 変数の確認
##  [1] "Manufacturer"       "Model"              "Type"              
##  [4] "Min.Price"          "Price"              "Max.Price"         
##  [7] "MPG.city"           "MPG.highway"        "AirBags"           
## [10] "DriveTrain"         "Cylinders"          "EngineSize"        
## [13] "Horsepower"         "RPM"                "Rev.per.mile"      
## [16] "Man.trans.avail"    "Fuel.tank.capacity" "Passengers"        
## [19] "Length"             "Wheelbase"          "Width"             
## [22] "Turn.circle"        "Rear.seat.room"     "Luggage.room"      
## [25] "Weight"             "Origin"             "Make"              
## [28] "cate.L"

8.2 データ値の変更

#各データ値に新しくデータ値を対応させる
x1 <- c(1,2,1,2)
x2 <- c(3,4,3,4)
d <- data.frame(x1,x2) # データフレーム化
d
##   x1 x2
## 1  1  3
## 2  2  4
## 3  1  3
## 4  2  4
names(d)[1] <- "xx" 
# 1列目の列名変更 全て変更の時:colnames(d) <- c("yy","xx") 
d
##   xx x2
## 1  1  3
## 2  2  4
## 3  1  3
## 4  2  4
d$xx1 <- factor(d$xx , levels = c(1,2) , labels = c("F","M")) 
# xxの値を変数xx1にF,Mに変更して追加
d
##   xx x2 xx1
## 1  1  3   F
## 2  2  4   M
## 3  1  3   F
## 4  2  4   M
d$xx2 <- factor(d$xx1 , levels = c("F","M") , labels = c(5,6)) 
# xx1の値を変数xx2に5,6に変更して追加
d
##   xx x2 xx1 xx2
## 1  1  3   F   5
## 2  2  4   M   6
## 3  1  3   F   5
## 4  2  4   M   6

8.3 データ値の変換

P <- mutate(Cars93 , price.2 = Price * 1000) # Price変数を1000倍し列追加
head(P)
##   Manufacturer   Model    Type Min.Price Price Max.Price MPG.city MPG.highway
## 1        Acura Integra   Small      12.9  15.9      18.8       25          31
## 2        Acura  Legend Midsize      29.2  33.9      38.7       18          25
## 3         Audi      90 Compact      25.9  29.1      32.3       20          26
## 4         Audi     100 Midsize      30.8  37.7      44.6       19          26
## 5          BMW    535i Midsize      23.7  30.0      36.2       22          30
## 6        Buick Century Midsize      14.2  15.7      17.3       22          31
##              AirBags DriveTrain Cylinders EngineSize Horsepower  RPM
## 1               None      Front         4        1.8        140 6300
## 2 Driver & Passenger      Front         6        3.2        200 5500
## 3        Driver only      Front         6        2.8        172 5500
## 4 Driver & Passenger      Front         6        2.8        172 5500
## 5        Driver only       Rear         4        3.5        208 5700
## 6        Driver only      Front         4        2.2        110 5200
##   Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1         2890             Yes               13.2          5    177       102
## 2         2335             Yes               18.0          5    195       115
## 3         2280             Yes               16.9          5    180       102
## 4         2535             Yes               21.1          6    193       106
## 5         2545             Yes               21.1          4    186       109
## 6         2565              No               16.4          6    189       105
##   Width Turn.circle Rear.seat.room Luggage.room Weight  Origin          Make
## 1    68          37           26.5           11   2705 non-USA Acura Integra
## 2    71          38           30.0           15   3560 non-USA  Acura Legend
## 3    67          37           28.0           14   3375 non-USA       Audi 90
## 4    70          37           31.0           17   3405 non-USA      Audi 100
## 5    69          39           27.0           13   3640 non-USA      BMW 535i
## 6    69          41           28.0           16   2880     USA Buick Century
##   cate.L price.2
## 1     LM   15900
## 2      H   33900
## 3      H   29100
## 4      H   37700
## 5      H   30000
## 6     LM   15700
# Priceの3変数の行平均を追加
P.m <- Cars93 %>% mutate( price.ave = (Price + Max.Price + Min.Price)/3 * 1000)
head(P.m)
##   Manufacturer   Model    Type Min.Price Price Max.Price MPG.city MPG.highway
## 1        Acura Integra   Small      12.9  15.9      18.8       25          31
## 2        Acura  Legend Midsize      29.2  33.9      38.7       18          25
## 3         Audi      90 Compact      25.9  29.1      32.3       20          26
## 4         Audi     100 Midsize      30.8  37.7      44.6       19          26
## 5          BMW    535i Midsize      23.7  30.0      36.2       22          30
## 6        Buick Century Midsize      14.2  15.7      17.3       22          31
##              AirBags DriveTrain Cylinders EngineSize Horsepower  RPM
## 1               None      Front         4        1.8        140 6300
## 2 Driver & Passenger      Front         6        3.2        200 5500
## 3        Driver only      Front         6        2.8        172 5500
## 4 Driver & Passenger      Front         6        2.8        172 5500
## 5        Driver only       Rear         4        3.5        208 5700
## 6        Driver only      Front         4        2.2        110 5200
##   Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1         2890             Yes               13.2          5    177       102
## 2         2335             Yes               18.0          5    195       115
## 3         2280             Yes               16.9          5    180       102
## 4         2535             Yes               21.1          6    193       106
## 5         2545             Yes               21.1          4    186       109
## 6         2565              No               16.4          6    189       105
##   Width Turn.circle Rear.seat.room Luggage.room Weight  Origin          Make
## 1    68          37           26.5           11   2705 non-USA Acura Integra
## 2    71          38           30.0           15   3560 non-USA  Acura Legend
## 3    67          37           28.0           14   3375 non-USA       Audi 90
## 4    70          37           31.0           17   3405 non-USA      Audi 100
## 5    69          39           27.0           13   3640 non-USA      BMW 535i
## 6    69          41           28.0           16   2880     USA Buick Century
##   cate.L price.ave
## 1     LM  15866.67
## 2      H  33933.33
## 3      H  29100.00
## 4      H  37700.00
## 5      H  29966.67
## 6     LM  15733.33

8.4 データ値を逆転させる

x11 <- c(1,2,3,4,5)
x22 <- c(1,2,3,4,5)
dd <- data.frame(x11,x22)
dd
##   x11 x22
## 1   1   1
## 2   2   2
## 3   3   3
## 4   4   4
## 5   5   5
#
cbind(dd , (apply(dd , 2 , function(x){(1+5)-x})))
##   x11 x22 x11 x22
## 1   1   1   5   5
## 2   2   2   4   4
## 3   3   3   3   3
## 4   4   4   2   2
## 5   5   5   1   1
dd
##   x11 x22
## 1   1   1
## 2   2   2
## 3   3   3
## 4   4   4
## 5   5   5
cbind(dd , (apply(dd , 2 , function(x){(1+5)-x})))[c(3,4,1,2)]
##   x11 x22 x11.1 x22.1
## 1   5   5     1     1
## 2   4   4     2     2
## 3   3   3     3     3
## 4   2   2     4     4
## 5   1   1     5     5
library(dplyr)
mutate(dd , x15 = (1+5) - x11 , x25 = (1+5) - x22 )
##   x11 x22 x15 x25
## 1   1   1   5   5
## 2   2   2   4   4
## 3   3   3   3   3
## 4   4   4   2   2
## 5   5   5   1   1

9 データ参照

#指定した列,行を参照する
Cars93$Price    # Price変数を参照
##  [1] 15.9 33.9 29.1 37.7 30.0 15.7 20.8 23.7 26.3 34.7 40.1 13.4 11.4 15.1 15.9
## [16] 16.3 16.6 18.8 38.0 18.4 15.8 29.5  9.2 11.3 13.3 19.0 15.6 25.8 12.2 19.3
## [31]  7.4 10.1 11.3 15.9 14.0 19.9 20.2 20.9  8.4 12.5 19.8 12.1 17.5  8.0 10.0
## [46] 10.0 13.9 47.9 28.0 35.2 34.3 36.1  8.3 11.6 16.5 19.1 32.5 31.9 61.9 14.1
## [61] 14.9 10.3 26.1 11.8 15.7 19.1 21.5 13.5 16.3 19.5 20.7 14.4  9.0 11.1 17.7
## [76] 18.5 24.4 28.7 11.1  8.4 10.9 19.5  8.6  9.8 18.4 18.2 22.7  9.1 19.7 20.0
## [91] 23.3 22.7 26.7
Cars93[,1]     # 1列目を参照 前カンマは列を表す
##  [1] Acura         Acura         Audi          Audi          BMW          
##  [6] Buick         Buick         Buick         Buick         Cadillac     
## [11] Cadillac      Chevrolet     Chevrolet     Chevrolet     Chevrolet    
## [16] Chevrolet     Chevrolet     Chevrolet     Chevrolet     Chrylser     
## [21] Chrysler      Chrysler      Dodge         Dodge         Dodge        
## [26] Dodge         Dodge         Dodge         Eagle         Eagle        
## [31] Ford          Ford          Ford          Ford          Ford         
## [36] Ford          Ford          Ford          Geo           Geo          
## [41] Honda         Honda         Honda         Hyundai       Hyundai      
## [46] Hyundai       Hyundai       Infiniti      Lexus         Lexus        
## [51] Lincoln       Lincoln       Mazda         Mazda         Mazda        
## [56] Mazda         Mazda         Mercedes-Benz Mercedes-Benz Mercury      
## [61] Mercury       Mitsubishi    Mitsubishi    Nissan        Nissan       
## [66] Nissan        Nissan        Oldsmobile    Oldsmobile    Oldsmobile   
## [71] Oldsmobile    Plymouth      Pontiac       Pontiac       Pontiac      
## [76] Pontiac       Pontiac       Saab          Saturn        Subaru       
## [81] Subaru        Subaru        Suzuki        Toyota        Toyota       
## [86] Toyota        Toyota        Volkswagen    Volkswagen    Volkswagen   
## [91] Volkswagen    Volvo         Volvo        
## 32 Levels: Acura Audi BMW Buick Cadillac Chevrolet Chrylser Chrysler ... Volvo
Cars93[1,]     # 1行目を参照 後ろカンマは行を表す
##   Manufacturer   Model  Type Min.Price Price Max.Price MPG.city MPG.highway
## 1        Acura Integra Small      12.9  15.9      18.8       25          31
##   AirBags DriveTrain Cylinders EngineSize Horsepower  RPM Rev.per.mile
## 1    None      Front         4        1.8        140 6300         2890
##   Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase Width
## 1             Yes               13.2          5    177       102    68
##   Turn.circle Rear.seat.room Luggage.room Weight  Origin          Make cate.L
## 1          37           26.5           11   2705 non-USA Acura Integra     LM
head(Cars93[,c(1,2)])   # 1,2列を参照
##   Manufacturer   Model
## 1        Acura Integra
## 2        Acura  Legend
## 3         Audi      90
## 4         Audi     100
## 5          BMW    535i
## 6        Buick Century

10 行,列の操作

10.1 行(row)操作

# 連続した行を抽出
r1_5 <- Cars93[c(1:5) , ] # subset(Cars93[c(1:5),]) 後ろカンマは行を表す
r1_5
##   Manufacturer   Model    Type Min.Price Price Max.Price MPG.city MPG.highway
## 1        Acura Integra   Small      12.9  15.9      18.8       25          31
## 2        Acura  Legend Midsize      29.2  33.9      38.7       18          25
## 3         Audi      90 Compact      25.9  29.1      32.3       20          26
## 4         Audi     100 Midsize      30.8  37.7      44.6       19          26
## 5          BMW    535i Midsize      23.7  30.0      36.2       22          30
##              AirBags DriveTrain Cylinders EngineSize Horsepower  RPM
## 1               None      Front         4        1.8        140 6300
## 2 Driver & Passenger      Front         6        3.2        200 5500
## 3        Driver only      Front         6        2.8        172 5500
## 4 Driver & Passenger      Front         6        2.8        172 5500
## 5        Driver only       Rear         4        3.5        208 5700
##   Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1         2890             Yes               13.2          5    177       102
## 2         2335             Yes               18.0          5    195       115
## 3         2280             Yes               16.9          5    180       102
## 4         2535             Yes               21.1          6    193       106
## 5         2545             Yes               21.1          4    186       109
##   Width Turn.circle Rear.seat.room Luggage.room Weight  Origin          Make
## 1    68          37           26.5           11   2705 non-USA Acura Integra
## 2    71          38           30.0           15   3560 non-USA  Acura Legend
## 3    67          37           28.0           14   3375 non-USA       Audi 90
## 4    70          37           31.0           17   3405 non-USA      Audi 100
## 5    69          39           27.0           13   3640 non-USA      BMW 535i
##   cate.L
## 1     LM
## 2      H
## 3      H
## 4      H
## 5      H
r4.5 <- Cars93[c(4,5) ,] #行番号で指定した行のみ抽出 
r4.5
##   Manufacturer Model    Type Min.Price Price Max.Price MPG.city MPG.highway
## 4         Audi   100 Midsize      30.8  37.7      44.6       19          26
## 5          BMW  535i Midsize      23.7  30.0      36.2       22          30
##              AirBags DriveTrain Cylinders EngineSize Horsepower  RPM
## 4 Driver & Passenger      Front         6        2.8        172 5500
## 5        Driver only       Rear         4        3.5        208 5700
##   Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 4         2535             Yes               21.1          6    193       106
## 5         2545             Yes               21.1          4    186       109
##   Width Turn.circle Rear.seat.room Luggage.room Weight  Origin     Make cate.L
## 4    70          37             31           17   3405 non-USA Audi 100      H
## 5    69          39             27           13   3640 non-USA BMW 535i      H
c93.r <- Cars93[c(-4,-5) ,] # 指定行を削除
head(c93.r)
##   Manufacturer      Model    Type Min.Price Price Max.Price MPG.city
## 1        Acura    Integra   Small      12.9  15.9      18.8       25
## 2        Acura     Legend Midsize      29.2  33.9      38.7       18
## 3         Audi         90 Compact      25.9  29.1      32.3       20
## 6        Buick    Century Midsize      14.2  15.7      17.3       22
## 7        Buick    LeSabre   Large      19.9  20.8      21.7       19
## 8        Buick Roadmaster   Large      22.6  23.7      24.9       16
##   MPG.highway            AirBags DriveTrain Cylinders EngineSize Horsepower
## 1          31               None      Front         4        1.8        140
## 2          25 Driver & Passenger      Front         6        3.2        200
## 3          26        Driver only      Front         6        2.8        172
## 6          31        Driver only      Front         4        2.2        110
## 7          28        Driver only      Front         6        3.8        170
## 8          25        Driver only       Rear         6        5.7        180
##    RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length
## 1 6300         2890             Yes               13.2          5    177
## 2 5500         2335             Yes               18.0          5    195
## 3 5500         2280             Yes               16.9          5    180
## 6 5200         2565              No               16.4          6    189
## 7 4800         1570              No               18.0          6    200
## 8 4000         1320              No               23.0          6    216
##   Wheelbase Width Turn.circle Rear.seat.room Luggage.room Weight  Origin
## 1       102    68          37           26.5           11   2705 non-USA
## 2       115    71          38           30.0           15   3560 non-USA
## 3       102    67          37           28.0           14   3375 non-USA
## 6       105    69          41           28.0           16   2880     USA
## 7       111    74          42           30.5           17   3470     USA
## 8       116    78          45           30.5           21   4105     USA
##               Make cate.L
## 1    Acura Integra     LM
## 2     Acura Legend      H
## 3          Audi 90      H
## 6    Buick Century     LM
## 7    Buick LeSabre      M
## 8 Buick Roadmaster      H
# 値が特定の行を抽出
# 条件式:== 等号,!= 否定等号,>= 以上,>より大,<=以下,<より小
# ds <- Cars93[Cars93$Type == "Small",] 
ds <- subset(Cars93 , subset = Type == "Small")  #TypeがSmallの行を抽出
table(ds$Type)                                   #Smallが抽出されているか確認
## 
## Compact   Large Midsize   Small  Sporty     Van 
##       0       0       0      21       0       0
dp <- subset(Cars93 , subset = Price <= 20)      #Priceが20以下の行を抽出
range(dp$Price)                    #Priceレンジが20以下か確認
## [1]  7.4 20.0
length(dp$Price)                           #length()関数:サンプル数を確認
## [1] 62
# dplyr
# filter(Cars93 , Type == "Small")
d.s <- Cars93 %>% filter(Type == "Small") # pipe演算子(%>%)を使用
table(d.s$Type)
## 
## Compact   Large Midsize   Small  Sporty     Van 
##       0       0       0      21       0       0

10.2 列(colum)操作

# 連続した列を抽出
c1_5 <- Cars93[ , c(1:5) ] 
head(c1_5)
##   Manufacturer   Model    Type Min.Price Price
## 1        Acura Integra   Small      12.9  15.9
## 2        Acura  Legend Midsize      29.2  33.9
## 3         Audi      90 Compact      25.9  29.1
## 4         Audi     100 Midsize      30.8  37.7
## 5          BMW    535i Midsize      23.7  30.0
## 6        Buick Century Midsize      14.2  15.7
c4.5 <- Cars93[ , c(4,5)] #列番号で指定した列のみ抽出 
head(c4.5)
##   Min.Price Price
## 1      12.9  15.9
## 2      29.2  33.9
## 3      25.9  29.1
## 4      30.8  37.7
## 5      23.7  30.0
## 6      14.2  15.7
c93.r <- Cars93[ , -c(4,5)] # 指定列を削除
head(c93.r)
##   Manufacturer   Model    Type Max.Price MPG.city MPG.highway
## 1        Acura Integra   Small      18.8       25          31
## 2        Acura  Legend Midsize      38.7       18          25
## 3         Audi      90 Compact      32.3       20          26
## 4         Audi     100 Midsize      44.6       19          26
## 5          BMW    535i Midsize      36.2       22          30
## 6        Buick Century Midsize      17.3       22          31
##              AirBags DriveTrain Cylinders EngineSize Horsepower  RPM
## 1               None      Front         4        1.8        140 6300
## 2 Driver & Passenger      Front         6        3.2        200 5500
## 3        Driver only      Front         6        2.8        172 5500
## 4 Driver & Passenger      Front         6        2.8        172 5500
## 5        Driver only       Rear         4        3.5        208 5700
## 6        Driver only      Front         4        2.2        110 5200
##   Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1         2890             Yes               13.2          5    177       102
## 2         2335             Yes               18.0          5    195       115
## 3         2280             Yes               16.9          5    180       102
## 4         2535             Yes               21.1          6    193       106
## 5         2545             Yes               21.1          4    186       109
## 6         2565              No               16.4          6    189       105
##   Width Turn.circle Rear.seat.room Luggage.room Weight  Origin          Make
## 1    68          37           26.5           11   2705 non-USA Acura Integra
## 2    71          38           30.0           15   3560 non-USA  Acura Legend
## 3    67          37           28.0           14   3375 non-USA       Audi 90
## 4    70          37           31.0           17   3405 non-USA      Audi 100
## 5    69          39           27.0           13   3640 non-USA      BMW 535i
## 6    69          41           28.0           16   2880     USA Buick Century
##   cate.L
## 1     LM
## 2      H
## 3      H
## 4      H
## 5      H
## 6     LM
# dplyr
# MASSライブラリーを呼び出しているときはselect関数が重複するのでエラーになる.
# そのときはdplyr::selectとdplyrを指定する
r.s <- dplyr::select(Cars93 , c(4 , 5)) 
head(r.s)
##   Min.Price Price
## 1      12.9  15.9
## 2      29.2  33.9
## 3      25.9  29.1
## 4      30.8  37.7
## 5      23.7  30.0
## 6      14.2  15.7
detach("package:MASS") # MASSライブラリーを外す
r4_5 <- Cars93 %>% select(c(4,6)) # 列番号で抽出,列名でも可 Min.Price,Max.Price
# デフォルトは大文字小文字の区別はしない.区別させたければ引数としてignore.case = FALSEを指定
r4_5 <- Cars93 %>% select(4,6) # 第2引数はベクトルでなくても可
head(r4_5)
##   Min.Price Max.Price
## 1      12.9      18.8
## 2      29.2      38.7
## 3      25.9      32.3
## 4      30.8      44.6
## 5      23.7      36.2
## 6      14.2      17.3
r4.10 <- Cars93 %>% select(c(4:10)) # 連続した列を抽出
head(r4.10)
##   Min.Price Price Max.Price MPG.city MPG.highway            AirBags DriveTrain
## 1      12.9  15.9      18.8       25          31               None      Front
## 2      29.2  33.9      38.7       18          25 Driver & Passenger      Front
## 3      25.9  29.1      32.3       20          26        Driver only      Front
## 4      30.8  37.7      44.6       19          26 Driver & Passenger      Front
## 5      23.7  30.0      36.2       22          30        Driver only       Rear
## 6      14.2  15.7      17.3       22          31        Driver only      Front
r_4_20 <- Cars93 %>% select( -c(4:20)) # 指定列を除外
head(r_4_20)
##   Manufacturer   Model    Type Width Turn.circle Rear.seat.room Luggage.room
## 1        Acura Integra   Small    68          37           26.5           11
## 2        Acura  Legend Midsize    71          38           30.0           15
## 3         Audi      90 Compact    67          37           28.0           14
## 4         Audi     100 Midsize    70          37           31.0           17
## 5          BMW    535i Midsize    69          39           27.0           13
## 6        Buick Century Midsize    69          41           28.0           16
##   Weight  Origin          Make cate.L
## 1   2705 non-USA Acura Integra     LM
## 2   3560 non-USA  Acura Legend      H
## 3   3375 non-USA       Audi 90      H
## 4   3405 non-USA      Audi 100      H
## 5   3640 non-USA      BMW 535i      H
## 6   2880     USA Buick Century     LM
r_4.6.9.10 <- Cars93 %>% select( -c(4 , 6 , 9 , 10)) # ベクトル
r_4.6.9.10 <- Cars93 %>% select( -4 , -6 , -9 , -10) # ベクトルでない時
head(r_4.6.9.10)
##   Manufacturer   Model    Type Price MPG.city MPG.highway Cylinders EngineSize
## 1        Acura Integra   Small  15.9       25          31         4        1.8
## 2        Acura  Legend Midsize  33.9       18          25         6        3.2
## 3         Audi      90 Compact  29.1       20          26         6        2.8
## 4         Audi     100 Midsize  37.7       19          26         6        2.8
## 5          BMW    535i Midsize  30.0       22          30         4        3.5
## 6        Buick Century Midsize  15.7       22          31         4        2.2
##   Horsepower  RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers
## 1        140 6300         2890             Yes               13.2          5
## 2        200 5500         2335             Yes               18.0          5
## 3        172 5500         2280             Yes               16.9          5
## 4        172 5500         2535             Yes               21.1          6
## 5        208 5700         2545             Yes               21.1          4
## 6        110 5200         2565              No               16.4          6
##   Length Wheelbase Width Turn.circle Rear.seat.room Luggage.room Weight  Origin
## 1    177       102    68          37           26.5           11   2705 non-USA
## 2    195       115    71          38           30.0           15   3560 non-USA
## 3    180       102    67          37           28.0           14   3375 non-USA
## 4    193       106    70          37           31.0           17   3405 non-USA
## 5    186       109    69          39           27.0           13   3640 non-USA
## 6    189       105    69          41           28.0           16   2880     USA
##            Make cate.L
## 1 Acura Integra     LM
## 2  Acura Legend      H
## 3       Audi 90      H
## 4      Audi 100      H
## 5      BMW 535i      H
## 6 Buick Century     LM
r.rname <- Cars93 %>% 
    select("価格" = 5 , "タイプ" = Type) # 列抽出し列名を変更 ""はなくても可
head(r.rname)
##   価格  タイプ
## 1 15.9   Small
## 2 33.9 Midsize
## 3 29.1 Compact
## 4 37.7 Midsize
## 5 30.0 Midsize
## 6 15.7 Midsize
r_contain <- Cars93 %>% select( contains("Price")) # contains 列名の一部分でselect
head(r_contain)
##   Min.Price Price Max.Price
## 1      12.9  15.9      18.8
## 2      29.2  33.9      38.7
## 3      25.9  29.1      32.3
## 4      30.8  37.7      44.6
## 5      23.7  30.0      36.2
## 6      14.2  15.7      17.3
r_contain_p <- Cars93 %>% select( contains("ice")) # contains 列名の一部分でselect
head(r_contain_p)
##   Min.Price Price Max.Price
## 1      12.9  15.9      18.8
## 2      29.2  33.9      38.7
## 3      25.9  29.1      32.3
## 4      30.8  37.7      44.6
## 5      23.7  30.0      36.2
## 6      14.2  15.7      17.3

11 データを結合する

11.1 行に追加:列数と列名が同じ場合

# rbind;列数と列名が同じ必要がある/列名が違っている場合列名をリネーム
d1.x <- data.frame( x12 = c(1:10))  # 1列,列名:x12
d1.x
##    x12
## 1    1
## 2    2
## 3    3
## 4    4
## 5    5
## 6    6
## 7    7
## 8    8
## 9    9
## 10  10
d1.y <- data.frame( x12 = c(11:20)) # 1列,列名:x12
d1.y
##    x12
## 1   11
## 2   12
## 3   13
## 4   14
## 5   15
## 6   16
## 7   17
## 8   18
## 9   19
## 10  20
rbind(d1.x,d1.y)
##    x12
## 1    1
## 2    2
## 3    3
## 4    4
## 5    5
## 6    6
## 7    7
## 8    8
## 9    9
## 10  10
## 11  11
## 12  12
## 13  13
## 14  14
## 15  15
## 16  16
## 17  17
## 18  18
## 19  19
## 20  20
d2.x <- data.frame( x12 = c(1:10) , y12 = c(1:10)  )  # 2列,列名:x12,y12
d2.x
##    x12 y12
## 1    1   1
## 2    2   2
## 3    3   3
## 4    4   4
## 5    5   5
## 6    6   6
## 7    7   7
## 8    8   8
## 9    9   9
## 10  10  10
d2.y <- data.frame( x12 = c(11:20) , y12 = c(11:20) ) # 2列,列名:x12,y12
d2.y
##    x12 y12
## 1   11  11
## 2   12  12
## 3   13  13
## 4   14  14
## 5   15  15
## 6   16  16
## 7   17  17
## 8   18  18
## 9   19  19
## 10  20  20
rbind(d2.x,d2.y)
##    x12 y12
## 1    1   1
## 2    2   2
## 3    3   3
## 4    4   4
## 5    5   5
## 6    6   6
## 7    7   7
## 8    8   8
## 9    9   9
## 10  10  10
## 11  11  11
## 12  12  12
## 13  13  13
## 14  14  14
## 15  15  15
## 16  16  16
## 17  17  17
## 18  18  18
## 19  19  19
## 20  20  20

11.2 行に追加:列数と列名が異なる場合

# bind_rows 列数と列名が異なっていても結合できる(NAで処理)
d3.1 <- data.frame( xx12 = c(1:10))  # xx12;1列10行
d3.2 <- data.frame( xx22 = c(31:41) , xy22 = c(41:51))  # xx22,yy22;2列11行
bind_rows(d3.1,d3.2)
##    xx12 xx22 xy22
## 1     1   NA   NA
## 2     2   NA   NA
## 3     3   NA   NA
## 4     4   NA   NA
## 5     5   NA   NA
## 6     6   NA   NA
## 7     7   NA   NA
## 8     8   NA   NA
## 9     9   NA   NA
## 10   10   NA   NA
## 11   NA   31   41
## 12   NA   32   42
## 13   NA   33   43
## 14   NA   34   44
## 15   NA   35   45
## 16   NA   36   46
## 17   NA   37   47
## 18   NA   38   48
## 19   NA   39   49
## 20   NA   40   50
## 21   NA   41   51

11.3 列を追加(merge);by変数をキー変数とする結合

d4.1 <- data.frame(id = seq(1:5) , d4.1 = c(11:15))
d4.2 <- data.frame(id = seq(1:5) , d4.2 = c(21:25))
merge( d4.1 , d4.2 , by = "id")
##   id d4.1 d4.2
## 1  1   11   21
## 2  2   12   22
## 3  3   13   23
## 4  4   14   24
## 5  5   15   25
cbind(d4.1,d4.2) # 全ての変数が結合される
##   id d4.1 id d4.2
## 1  1   11  1   21
## 2  2   12  2   22
## 3  3   13  3   23
## 4  4   14  4   24
## 5  5   15  5   25
# by変数は連番でなくてもOK
d7.1 <- data.frame(id = c(2,4,3,1,5) , d4.1 = c(11:15))
d7.2 <- data.frame(id = seq(1:5) , d4.2 = c(21:25))
merge( d7.1 , d7.2 , by = "id")
##   id d4.1 d4.2
## 1  1   14   21
## 2  2   11   22
## 3  3   13   23
## 4  4   12   24
## 5  5   15   25

11.4 列を追加(by変数が非対応)

d5.1 <- data.frame(id = c(5:9) , d5.1 = c(11:15))
d5.2 <- data.frame(id = seq(1:5) , d5.2 = c(21:25))
merge( d5.1 , d5.2 , by = "id" , all = T) # byで指定した変数が非対応のデータも結合
##   id d5.1 d5.2
## 1  1   NA   21
## 2  2   NA   22
## 3  3   NA   23
## 4  4   NA   24
## 5  5   11   25
## 6  6   12   NA
## 7  7   13   NA
## 8  8   14   NA
## 9  9   15   NA
merge( d5.1 , d5.2 , by = "id" , all = F) # byで指定した変数が対応するデータのみ結合
##   id d5.1 d5.2
## 1  5   11   25

11.5 列を追加(by変数名が非対応)

d6.1 <- data.frame(id = seq(1:5) ,  d6.1= c(11:15))
d6.1
##   id d6.1
## 1  1   11
## 2  2   12
## 3  3   13
## 4  4   14
## 5  5   15
d6.2 <- data.frame(ID = seq(1:5) , d6.2 = c(21:25))
d6.2
##   ID d6.2
## 1  1   21
## 2  2   22
## 3  3   23
## 4  4   24
## 5  5   25
merge( d6.1 , d6.2 , by.x = "id" , by.y = "ID")
##   id d6.1 d6.2
## 1  1   11   21
## 2  2   12   22
## 3  3   13   23
## 4  4   14   24
## 5  5   15   25

12 データをグループ化

Cars93 %>% summarise( n = n())
##    n
## 1 93
gp <- Cars93 %>% group_by(Type) # Typeでグループ化
gp  # 見た目変わらないが返り値を見ると,Groups:Type [6]となっている.
## # A tibble: 93 × 28
## # Groups:   Type [6]
##    Manufacturer Model      Type   Min.Price Price Max.Price MPG.city MPG.highway
##    <fct>        <fct>      <fct>      <dbl> <dbl>     <dbl>    <int>       <int>
##  1 Acura        Integra    Small       12.9  15.9      18.8       25          31
##  2 Acura        Legend     Midsi…      29.2  33.9      38.7       18          25
##  3 Audi         90         Compa…      25.9  29.1      32.3       20          26
##  4 Audi         100        Midsi…      30.8  37.7      44.6       19          26
##  5 BMW          535i       Midsi…      23.7  30        36.2       22          30
##  6 Buick        Century    Midsi…      14.2  15.7      17.3       22          31
##  7 Buick        LeSabre    Large       19.9  20.8      21.7       19          28
##  8 Buick        Roadmaster Large       22.6  23.7      24.9       16          25
##  9 Buick        Riviera    Midsi…      26.3  26.3      26.3       19          27
## 10 Cadillac     DeVille    Large       33    34.7      36.3       16          25
## # ℹ 83 more rows
## # ℹ 20 more variables: AirBags <fct>, DriveTrain <fct>, Cylinders <fct>,
## #   EngineSize <dbl>, Horsepower <int>, RPM <int>, Rev.per.mile <int>,
## #   Man.trans.avail <fct>, Fuel.tank.capacity <dbl>, Passengers <int>,
## #   Length <int>, Wheelbase <int>, Width <int>, Turn.circle <int>,
## #   Rear.seat.room <dbl>, Luggage.room <int>, Weight <int>, Origin <fct>,
## #   Make <fct>, cate.L <ord>
gp %>% summarise(n = n()) # Typeでグループ化されている
## # A tibble: 6 × 2
##   Type        n
##   <fct>   <int>
## 1 Compact    16
## 2 Large      11
## 3 Midsize    22
## 4 Small      21
## 5 Sporty     14
## 6 Van         9
gp <- ungroup(gp) # グループ化解除 オブジェクト名は変えた方がいい
gp
## # A tibble: 93 × 28
##    Manufacturer Model      Type   Min.Price Price Max.Price MPG.city MPG.highway
##    <fct>        <fct>      <fct>      <dbl> <dbl>     <dbl>    <int>       <int>
##  1 Acura        Integra    Small       12.9  15.9      18.8       25          31
##  2 Acura        Legend     Midsi…      29.2  33.9      38.7       18          25
##  3 Audi         90         Compa…      25.9  29.1      32.3       20          26
##  4 Audi         100        Midsi…      30.8  37.7      44.6       19          26
##  5 BMW          535i       Midsi…      23.7  30        36.2       22          30
##  6 Buick        Century    Midsi…      14.2  15.7      17.3       22          31
##  7 Buick        LeSabre    Large       19.9  20.8      21.7       19          28
##  8 Buick        Roadmaster Large       22.6  23.7      24.9       16          25
##  9 Buick        Riviera    Midsi…      26.3  26.3      26.3       19          27
## 10 Cadillac     DeVille    Large       33    34.7      36.3       16          25
## # ℹ 83 more rows
## # ℹ 20 more variables: AirBags <fct>, DriveTrain <fct>, Cylinders <fct>,
## #   EngineSize <dbl>, Horsepower <int>, RPM <int>, Rev.per.mile <int>,
## #   Man.trans.avail <fct>, Fuel.tank.capacity <dbl>, Passengers <int>,
## #   Length <int>, Wheelbase <int>, Width <int>, Turn.circle <int>,
## #   Rear.seat.room <dbl>, Luggage.room <int>, Weight <int>, Origin <fct>,
## #   Make <fct>, cate.L <ord>
gp %>% summarise( n = n()) # n=93
## # A tibble: 1 × 1
##       n
##   <int>
## 1    93
# グループ別統計量
Cars93 %>% group_by(Type)  %>% summarise(Mean = mean(Price),
                                         SD = sd(Price))
## # A tibble: 6 × 3
##   Type     Mean    SD
##   <fct>   <dbl> <dbl>
## 1 Compact  18.2  6.69
## 2 Large    24.3  6.34
## 3 Midsize  27.2 12.3 
## 4 Small    10.2  1.95
## 5 Sporty   19.4  7.97
## 6 Van      19.1  1.88
# Type別Origin別
g2 <- Cars93 %>% group_by(Type , Origin) 
g2m <-g2 %>% summarise(平均価格 = mean(Price))
g2m
## # A tibble: 11 × 3
## # Groups:   Type [6]
##    Type    Origin  平均価格
##    <fct>   <fct>      <dbl>
##  1 Compact USA         12.8
##  2 Compact non-USA     22.4
##  3 Large   USA         24.3
##  4 Midsize USA         21.8
##  5 Midsize non-USA     31.8
##  6 Small   USA         10.0
##  7 Small   non-USA     10.2
##  8 Sporty  USA         19.4
##  9 Sporty  non-USA     19.4
## 10 Van     USA         18.3
## 11 Van     non-USA     20.2
g2 <-  ungroup(g2)
# まとめてコードを書く
Cars93 %>% group_by(Type , Origin) %>% summarise(平均価格 = mean(Price))
## # A tibble: 11 × 3
## # Groups:   Type [6]
##    Type    Origin  平均価格
##    <fct>   <fct>      <dbl>
##  1 Compact USA         12.8
##  2 Compact non-USA     22.4
##  3 Large   USA         24.3
##  4 Midsize USA         21.8
##  5 Midsize non-USA     31.8
##  6 Small   USA         10.0
##  7 Small   non-USA     10.2
##  8 Sporty  USA         19.4
##  9 Sporty  non-USA     19.4
## 10 Van     USA         18.3
## 11 Van     non-USA     20.2

13 標準化(Z)

z <- Cars93[, c(5,7,8,12,13,14)] # 5,7,8,12,13,14列を抽出
std <- scale(z) # scale関数で標準化するとclassは"matrix" "array" 
# std <- data.frame(scale(z)) # data.frameで持つ場合
library(psych)
describe(std)
##             vars  n mean sd median trimmed  mad   min  max range  skew kurtosis
## Price          1 93    0  1  -0.19   -0.13 0.86 -1.25 4.39  5.64  1.48     3.05
## MPG.city       2 93    0  1  -0.24   -0.13 0.79 -1.31 4.21  5.52  1.65     3.58
## MPG.highway    3 93    0  1  -0.20   -0.09 0.83 -1.70 3.92  5.63  1.19     2.30
## EngineSize     4 93    0  1  -0.26   -0.10 0.86 -1.61 2.92  4.53  0.83     0.23
## Horsepower     5 93    0  1  -0.07   -0.09 0.85 -1.70 2.98  4.68  0.92     0.90
## RPM            6 93    0  1  -0.14    0.05 0.99 -2.48 2.04  4.52 -0.25    -0.51
##              se
## Price       0.1
## MPG.city    0.1
## MPG.highway 0.1
## EngineSize  0.1
## Horsepower  0.1
## RPM         0.1

14 偏差値化

h <- scale(z) * 10 + 50 
describe(h)
##             vars  n mean sd median trimmed  mad   min   max range  skew
## Price          1 93   50 10  48.13   48.74 8.60 37.46 93.88 56.42  1.48
## MPG.city       2 93   50 10  47.57   48.66 7.91 36.89 92.06 55.16  1.65
## MPG.highway    3 93   50 10  47.96   49.09 8.34 32.96 89.23 56.27  1.19
## EngineSize     4 93   50 10  47.42   48.97 8.58 33.92 79.23 45.31  0.83
## Horsepower     5 93   50 10  49.27   49.07 8.49 33.04 79.82 46.78  0.92
## RPM            6 93   50 10  48.65   50.48 9.94 25.19 70.43 45.25 -0.25
##             kurtosis   se
## Price           3.05 1.04
## MPG.city        3.58 1.04
## MPG.highway     2.30 1.04
## EngineSize      0.23 1.04
## Horsepower      0.90 1.04
## RPM            -0.51 1.04

15 中心化(centering):変数の値から定数を引く

# scale関数の中心化は平均値を引いている
C <- scale(z , scale = F)
# C <- data.frame(scale(z , scale = F))
describe(C)
##             vars  n mean     sd median trimmed    mad      min     max  range
## Price          1 93    0   9.66  -1.81   -1.22   8.30   -12.11   42.39   54.5
## MPG.city       2 93    0   5.62  -1.37   -0.75   4.45    -7.37   23.63   31.0
## MPG.highway    3 93    0   5.33  -1.09   -0.49   4.45    -9.09   20.91   30.0
## EngineSize     4 93    0   1.04  -0.27   -0.11   0.89    -1.67    3.03    4.7
## Horsepower     5 93    0  52.37  -3.83   -4.88  44.48   -88.83  156.17  245.0
## RPM            6 93    0 596.73 -80.65   28.69 593.04 -1480.65 1219.35 2700.0
##              skew kurtosis    se
## Price        1.48     3.05  1.00
## MPG.city     1.65     3.58  0.58
## MPG.highway  1.19     2.30  0.55
## EngineSize   0.83     0.23  0.11
## Horsepower   0.92     0.90  5.43
## RPM         -0.25    -0.51 61.88