Rにおけるデータハンドリング (データ操作)は{dplyr}と{tidyr}が便利である.
パイプ演算子(%>%)を使うとコードの可読性が高まる. パイプ演算子%>%は左辺の出力を右辺の関数の第1引数に渡す.ショートカットはWin:Shift+Ctrl+M,Mac:Shift+command+M
library(tidyverse)
library(magrittr)
library(dplyr)
library(psych)
# 処理1
# x=1:10 , y=10:19 , g=as.factor(rep(1:2 , 5)) というdata.frameを作成
<- c(1:10);g <- as.factor(rep(1:2 , 5))
x <- data.frame(x,g)
d d
## x g
## 1 1 1
## 2 2 2
## 3 3 1
## 4 4 2
## 5 5 1
## 6 6 2
## 7 7 1
## 8 8 2
## 9 9 1
## 10 10 2
#gでグループ化
<- subset(d , subset = g == "1")
g1 <- subset(d , subset = g == "2")
g2 #g1のsummaryを算出
mean(g1$x);mean(g2$x)
## [1] 5
## [1] 6
# 処理2:lapply関数を使い1行でコーディング(gが2なので2行になる)
lapply(subset(data.frame(x = c(1:10) ,
g = as.factor(rep(1:2 , 5))),subset = g == "1"),summary)
## $x
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 3 5 5 7 9
##
## $g
## 1 2
## 5 0
lapply(subset(data.frame(x = c(1:10) ,
g = as.factor(rep(1:2 , 5))),subset = g == "2"),summary)
## $x
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 4 6 6 8 10
##
## $g
## 1 2
## 0 5
# 処理3:pipe演算子を使う
data.frame(x = c(1:10) , y = c(10:19) , g = as.factor(rep(1:2 , 5)))%>% # data.frame
subset( subset = g == "1") %>% # subset
summary() # summary
## x y g
## Min. :1 Min. :10 1:5
## 1st Qu.:3 1st Qu.:12 2:0
## Median :5 Median :14
## Mean :5 Mean :14
## 3rd Qu.:7 3rd Qu.:16
## Max. :9 Max. :18
処理1は1stepごとにオブジェクトに代入しながら処理:何をしているかわかりやすいが冗長 処理2はlapply関数に 1行でコーディング:極めて可読性が悪い 処理3は処理1と同様1stepごとにコーディングしているが代入せず,左辺を右辺に渡している.可読性が高い.
# 簡単な例
data("iris")
colnames(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
%>%
iris group_by(Species) %>%
summarise(n = n())
## # A tibble: 3 × 2
## Species n
## <fct> <int>
## 1 setosa 50
## 2 versicolor 50
## 3 virginica 50
%>%
iris group_by(Species) %>%
summarise(ave.sl = mean(Sepal.Length),
ave.sw = mean(Sepal.Width),
ave.pw = mean(Petal.Width))
## # A tibble: 3 × 4
## Species ave.sl ave.sw ave.pw
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 5.01 3.43 0.246
## 2 versicolor 5.94 2.77 1.33
## 3 virginica 6.59 2.97 2.03
#library(MASS) # libtary:読み込み
#detach("package:MASS") # libtary:取り外し
#ls() # メモリ上にあるオブジェクト名を確認
#rm(list=ls()) # メモリ上にあるすべてのオブジェクトを消去
## Environmentで確認、「箒」アイコンで消去でも可
## Consoleへの出力結果の外部保存;sink関数
# 最初に以下のコードを実行してから分析をはじめ
#sink("out.txt" , append = T) # append = Tで上書きせず追加
#sink("data/out.txt" , append =T) # dataフォルダにout.txtで保存
#sink() # 出力終了
# 最後に上記のコードを実行して分析を終了
# data入力時欠測値をNAと入力していれば問題なし
# blank:読み込み時NAになる.ピリオドや記号,数値で入力されている場合は
# NA指定する必要がある.指定しないとNA扱いにならない,
# data読込時,引数na.strings=()で指定する.
# read.csv(file, na.strings = (c("99", "."))) # "99"と"."をNAに指定
#CSVデータでは,欠測値はブランクにしておく.Rで読み込むと自動的にNA(Not Available)となる.
#ピリオドや記号,数値で入力されている場合は指定する.指定しないとNAにならない
#文字列データのブランクを欠測値とする時:na.strings=""
# data読込時,引数na.strings=()で指定する.
# read.csv(file名, na.strings = (c("99", "."))) # "99"と"."をNAに指定
library(MASS)
data(Cars93)
#欠測値の確認
#is.na(Cars93) # dataが全て出力される
anyNA(Cars93) # TRUEでNAあり
## [1] TRUE
#どの行に欠測値があるかを確認する
complete.cases(Cars93)# FALSE;欠測値有り
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
#欠測値を含む行を表示
!complete.cases(Cars93),] # !は演算子not Cars93[
## Manufacturer Model Type Min.Price Price Max.Price MPG.city
## 16 Chevrolet Lumina_APV Van 14.7 16.3 18.0 18
## 17 Chevrolet Astro Van 14.7 16.6 18.6 15
## 19 Chevrolet Corvette Sporty 34.6 38.0 41.5 17
## 26 Dodge Caravan Van 13.6 19.0 24.4 17
## 36 Ford Aerostar Van 14.5 19.9 25.3 15
## 56 Mazda MPV Van 16.6 19.1 21.7 18
## 57 Mazda RX-7 Sporty 32.5 32.5 32.5 17
## 66 Nissan Quest Van 16.7 19.1 21.5 17
## 70 Oldsmobile Silhouette Van 19.5 19.5 19.5 18
## 87 Toyota Previa Van 18.9 22.7 26.6 18
## 89 Volkswagen Eurovan Van 16.6 19.7 22.7 17
## MPG.highway AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 16 23 None Front 6 3.8 170 4800
## 17 20 None 4WD 6 4.3 165 4000
## 19 25 Driver only Rear 8 5.7 300 5000
## 26 21 Driver only 4WD 6 3.0 142 5000
## 36 20 Driver only 4WD 6 3.0 145 4800
## 56 24 None 4WD 6 3.0 155 5000
## 57 25 Driver only Rear rotary 1.3 255 6500
## 66 23 None Front 6 3.0 151 4800
## 70 23 None Front 6 3.8 170 4800
## 87 22 Driver only 4WD 4 2.4 138 5000
## 89 21 None Front 5 2.5 109 4500
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 16 1690 No 20.0 7 178 110
## 17 1790 No 27.0 8 194 111
## 19 1450 Yes 20.0 2 179 96
## 26 1970 No 20.0 7 175 112
## 36 2080 Yes 21.0 7 176 119
## 56 2240 No 19.6 7 190 110
## 57 2325 Yes 20.0 2 169 96
## 66 2065 No 20.0 7 190 112
## 70 1690 No 20.0 7 194 110
## 87 2515 Yes 19.8 7 187 113
## 89 2915 Yes 21.1 7 187 115
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin
## 16 74 44 30.5 NA 3715 USA
## 17 78 42 33.5 NA 4025 USA
## 19 74 43 NA NA 3380 USA
## 26 72 42 26.5 NA 3705 USA
## 36 72 45 30.0 NA 3735 USA
## 56 72 39 27.5 NA 3735 non-USA
## 57 69 37 NA NA 2895 non-USA
## 66 74 41 27.0 NA 4100 non-USA
## 70 74 44 30.5 NA 3715 USA
## 87 71 41 35.0 NA 3785 non-USA
## 89 72 38 34.0 NA 3960 non-USA
## Make
## 16 Chevrolet Lumina_APV
## 17 Chevrolet Astro
## 19 Chevrolet Corvette
## 26 Dodge Caravan
## 36 Ford Aerostar
## 56 Mazda MPV
## 57 Mazda RX-7
## 66 Nissan Quest
## 70 Oldsmobile Silhouette
## 87 Toyota Previa
## 89 Volkswagen Eurovan
<- na.omit(df) #na.omit()関数:NAをomitしたdata作成
df.omit #df.omit <- df[complete.cases(df),] #na.omit()関数と同じ
nrow(df.omit) #リストワイズ削除後のサンプル数
## NULL
<- c(1:10,NA)
na.dat na.dat
## [1] 1 2 3 4 5 6 7 8 9 10 NA
is.na(na.dat)] <- 0 # NAを置換;NAを0に置換
na.dat[ na.dat
## [1] 1 2 3 4 5 6 7 8 9 10 0
== 0] <- NA # NAに置換;0をNAに置換
na.dat[na.dat na.dat
## [1] 1 2 3 4 5 6 7 8 9 10 NA
# データフレーム全体を処理
data(Cars93)
is.na(Cars93)] <- 99999 # 欠測値を99999に置換
Cars93[== 99999] <- NA # 99999をNAに置換 Cars93[Cars93
data(Cars93)
#欠測値有りを許容しない関数を使う場合;欠測値を除外したdataを作成
<- na.omit(Cars93)
Cars93.naomit nrow(Cars93.naomit) # 82サンプル
## [1] 82
nrow(Cars93) # 93サンプル
## [1] 93
#欠測値の除外を引数で指定できる関数の場合
mean(Cars93$Luggage.room) # 返り値 NA mean関数は欠測値除外をしないと計算しない
## [1] NA
mean(Cars93$Luggage.room, na.rm=TRUE)
## [1] 13.89024
cor(Cars93$Length , Cars93$Luggage.room) # 返り値 NA cor関数は欠測値除外をしないと計算しない
## [1] NA
cor(Cars93$Length , Cars93$Luggage.room, use="pairwise.complete.obs") # ペワイズ削除を指定
## [1] 0.7129622
summary(Cars93$Price) # 5数要約
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.40 12.20 17.70 19.51 23.30 61.90
$cate.L <- cut( Cars93$Price, #cut()関数
Cars93breaks = c(7.4 , 12.2 , 17.7, 23.3 , 61.9), #区切り値
##階級幅:7.4-12.2未満,12.2以上-17.7未満,17.7以上-23.3未満,23.3以上-61.9
labels= c("L","LM","M","H"), #ラベル
right=F, #T:7.4-12.2以下,12.2より大-19.52以下
ordered_result = T) #順序性
table(Cars93$cate.L) #分布の確認
##
## L LM M H
## 23 23 23 23
names(Cars93) # 変数の確認
## [1] "Manufacturer" "Model" "Type"
## [4] "Min.Price" "Price" "Max.Price"
## [7] "MPG.city" "MPG.highway" "AirBags"
## [10] "DriveTrain" "Cylinders" "EngineSize"
## [13] "Horsepower" "RPM" "Rev.per.mile"
## [16] "Man.trans.avail" "Fuel.tank.capacity" "Passengers"
## [19] "Length" "Wheelbase" "Width"
## [22] "Turn.circle" "Rear.seat.room" "Luggage.room"
## [25] "Weight" "Origin" "Make"
## [28] "cate.L"
#各データ値に新しくデータ値を対応させる
<- c(1,2,1,2)
x1 <- c(3,4,3,4)
x2 <- data.frame(x1,x2) # データフレーム化
d d
## x1 x2
## 1 1 3
## 2 2 4
## 3 1 3
## 4 2 4
names(d)[1] <- "xx"
# 1列目の列名変更 全て変更の時:colnames(d) <- c("yy","xx")
d
## xx x2
## 1 1 3
## 2 2 4
## 3 1 3
## 4 2 4
$xx1 <- factor(d$xx , levels = c(1,2) , labels = c("F","M"))
d# xxの値を変数xx1にF,Mに変更して追加
d
## xx x2 xx1
## 1 1 3 F
## 2 2 4 M
## 3 1 3 F
## 4 2 4 M
$xx2 <- factor(d$xx1 , levels = c("F","M") , labels = c(5,6))
d# xx1の値を変数xx2に5,6に変更して追加
d
## xx x2 xx1 xx2
## 1 1 3 F 5
## 2 2 4 M 6
## 3 1 3 F 5
## 4 2 4 M 6
<- mutate(Cars93 , price.2 = Price * 1000) # Price変数を1000倍し列追加
P head(P)
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## 2 Acura Legend Midsize 29.2 33.9 38.7 18 25
## 3 Audi 90 Compact 25.9 29.1 32.3 20 26
## 4 Audi 100 Midsize 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsize 23.7 30.0 36.2 22 30
## 6 Buick Century Midsize 14.2 15.7 17.3 22 31
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 1 None Front 4 1.8 140 6300
## 2 Driver & Passenger Front 6 3.2 200 5500
## 3 Driver only Front 6 2.8 172 5500
## 4 Driver & Passenger Front 6 2.8 172 5500
## 5 Driver only Rear 4 3.5 208 5700
## 6 Driver only Front 4 2.2 110 5200
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1 2890 Yes 13.2 5 177 102
## 2 2335 Yes 18.0 5 195 115
## 3 2280 Yes 16.9 5 180 102
## 4 2535 Yes 21.1 6 193 106
## 5 2545 Yes 21.1 4 186 109
## 6 2565 No 16.4 6 189 105
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin Make
## 1 68 37 26.5 11 2705 non-USA Acura Integra
## 2 71 38 30.0 15 3560 non-USA Acura Legend
## 3 67 37 28.0 14 3375 non-USA Audi 90
## 4 70 37 31.0 17 3405 non-USA Audi 100
## 5 69 39 27.0 13 3640 non-USA BMW 535i
## 6 69 41 28.0 16 2880 USA Buick Century
## cate.L price.2
## 1 LM 15900
## 2 H 33900
## 3 H 29100
## 4 H 37700
## 5 H 30000
## 6 LM 15700
# Priceの3変数の行平均を追加
<- Cars93 %>% mutate( price.ave = (Price + Max.Price + Min.Price)/3 * 1000)
P.m head(P.m)
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## 2 Acura Legend Midsize 29.2 33.9 38.7 18 25
## 3 Audi 90 Compact 25.9 29.1 32.3 20 26
## 4 Audi 100 Midsize 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsize 23.7 30.0 36.2 22 30
## 6 Buick Century Midsize 14.2 15.7 17.3 22 31
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 1 None Front 4 1.8 140 6300
## 2 Driver & Passenger Front 6 3.2 200 5500
## 3 Driver only Front 6 2.8 172 5500
## 4 Driver & Passenger Front 6 2.8 172 5500
## 5 Driver only Rear 4 3.5 208 5700
## 6 Driver only Front 4 2.2 110 5200
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1 2890 Yes 13.2 5 177 102
## 2 2335 Yes 18.0 5 195 115
## 3 2280 Yes 16.9 5 180 102
## 4 2535 Yes 21.1 6 193 106
## 5 2545 Yes 21.1 4 186 109
## 6 2565 No 16.4 6 189 105
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin Make
## 1 68 37 26.5 11 2705 non-USA Acura Integra
## 2 71 38 30.0 15 3560 non-USA Acura Legend
## 3 67 37 28.0 14 3375 non-USA Audi 90
## 4 70 37 31.0 17 3405 non-USA Audi 100
## 5 69 39 27.0 13 3640 non-USA BMW 535i
## 6 69 41 28.0 16 2880 USA Buick Century
## cate.L price.ave
## 1 LM 15866.67
## 2 H 33933.33
## 3 H 29100.00
## 4 H 37700.00
## 5 H 29966.67
## 6 LM 15733.33
<- c(1,2,3,4,5)
x11 <- c(1,2,3,4,5)
x22 <- data.frame(x11,x22)
dd dd
## x11 x22
## 1 1 1
## 2 2 2
## 3 3 3
## 4 4 4
## 5 5 5
#
cbind(dd , (apply(dd , 2 , function(x){(1+5)-x})))
## x11 x22 x11 x22
## 1 1 1 5 5
## 2 2 2 4 4
## 3 3 3 3 3
## 4 4 4 2 2
## 5 5 5 1 1
dd
## x11 x22
## 1 1 1
## 2 2 2
## 3 3 3
## 4 4 4
## 5 5 5
cbind(dd , (apply(dd , 2 , function(x){(1+5)-x})))[c(3,4,1,2)]
## x11 x22 x11.1 x22.1
## 1 5 5 1 1
## 2 4 4 2 2
## 3 3 3 3 3
## 4 2 2 4 4
## 5 1 1 5 5
library(dplyr)
mutate(dd , x15 = (1+5) - x11 , x25 = (1+5) - x22 )
## x11 x22 x15 x25
## 1 1 1 5 5
## 2 2 2 4 4
## 3 3 3 3 3
## 4 4 4 2 2
## 5 5 5 1 1
#指定した列,行を参照する
$Price # Price変数を参照 Cars93
## [1] 15.9 33.9 29.1 37.7 30.0 15.7 20.8 23.7 26.3 34.7 40.1 13.4 11.4 15.1 15.9
## [16] 16.3 16.6 18.8 38.0 18.4 15.8 29.5 9.2 11.3 13.3 19.0 15.6 25.8 12.2 19.3
## [31] 7.4 10.1 11.3 15.9 14.0 19.9 20.2 20.9 8.4 12.5 19.8 12.1 17.5 8.0 10.0
## [46] 10.0 13.9 47.9 28.0 35.2 34.3 36.1 8.3 11.6 16.5 19.1 32.5 31.9 61.9 14.1
## [61] 14.9 10.3 26.1 11.8 15.7 19.1 21.5 13.5 16.3 19.5 20.7 14.4 9.0 11.1 17.7
## [76] 18.5 24.4 28.7 11.1 8.4 10.9 19.5 8.6 9.8 18.4 18.2 22.7 9.1 19.7 20.0
## [91] 23.3 22.7 26.7
1] # 1列目を参照 前カンマは列を表す Cars93[,
## [1] Acura Acura Audi Audi BMW
## [6] Buick Buick Buick Buick Cadillac
## [11] Cadillac Chevrolet Chevrolet Chevrolet Chevrolet
## [16] Chevrolet Chevrolet Chevrolet Chevrolet Chrylser
## [21] Chrysler Chrysler Dodge Dodge Dodge
## [26] Dodge Dodge Dodge Eagle Eagle
## [31] Ford Ford Ford Ford Ford
## [36] Ford Ford Ford Geo Geo
## [41] Honda Honda Honda Hyundai Hyundai
## [46] Hyundai Hyundai Infiniti Lexus Lexus
## [51] Lincoln Lincoln Mazda Mazda Mazda
## [56] Mazda Mazda Mercedes-Benz Mercedes-Benz Mercury
## [61] Mercury Mitsubishi Mitsubishi Nissan Nissan
## [66] Nissan Nissan Oldsmobile Oldsmobile Oldsmobile
## [71] Oldsmobile Plymouth Pontiac Pontiac Pontiac
## [76] Pontiac Pontiac Saab Saturn Subaru
## [81] Subaru Subaru Suzuki Toyota Toyota
## [86] Toyota Toyota Volkswagen Volkswagen Volkswagen
## [91] Volkswagen Volvo Volvo
## 32 Levels: Acura Audi BMW Buick Cadillac Chevrolet Chrylser Chrysler ... Volvo
1,] # 1行目を参照 後ろカンマは行を表す Cars93[
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM Rev.per.mile
## 1 None Front 4 1.8 140 6300 2890
## Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase Width
## 1 Yes 13.2 5 177 102 68
## Turn.circle Rear.seat.room Luggage.room Weight Origin Make cate.L
## 1 37 26.5 11 2705 non-USA Acura Integra LM
head(Cars93[,c(1,2)]) # 1,2列を参照
## Manufacturer Model
## 1 Acura Integra
## 2 Acura Legend
## 3 Audi 90
## 4 Audi 100
## 5 BMW 535i
## 6 Buick Century
# 連続した行を抽出
<- Cars93[c(1:5) , ] # subset(Cars93[c(1:5),]) 後ろカンマは行を表す
r1_5 r1_5
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## 2 Acura Legend Midsize 29.2 33.9 38.7 18 25
## 3 Audi 90 Compact 25.9 29.1 32.3 20 26
## 4 Audi 100 Midsize 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsize 23.7 30.0 36.2 22 30
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 1 None Front 4 1.8 140 6300
## 2 Driver & Passenger Front 6 3.2 200 5500
## 3 Driver only Front 6 2.8 172 5500
## 4 Driver & Passenger Front 6 2.8 172 5500
## 5 Driver only Rear 4 3.5 208 5700
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1 2890 Yes 13.2 5 177 102
## 2 2335 Yes 18.0 5 195 115
## 3 2280 Yes 16.9 5 180 102
## 4 2535 Yes 21.1 6 193 106
## 5 2545 Yes 21.1 4 186 109
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin Make
## 1 68 37 26.5 11 2705 non-USA Acura Integra
## 2 71 38 30.0 15 3560 non-USA Acura Legend
## 3 67 37 28.0 14 3375 non-USA Audi 90
## 4 70 37 31.0 17 3405 non-USA Audi 100
## 5 69 39 27.0 13 3640 non-USA BMW 535i
## cate.L
## 1 LM
## 2 H
## 3 H
## 4 H
## 5 H
.5 <- Cars93[c(4,5) ,] #行番号で指定した行のみ抽出
r4.5 r4
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## 4 Audi 100 Midsize 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsize 23.7 30.0 36.2 22 30
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 4 Driver & Passenger Front 6 2.8 172 5500
## 5 Driver only Rear 4 3.5 208 5700
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 4 2535 Yes 21.1 6 193 106
## 5 2545 Yes 21.1 4 186 109
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin Make cate.L
## 4 70 37 31 17 3405 non-USA Audi 100 H
## 5 69 39 27 13 3640 non-USA BMW 535i H
<- Cars93[c(-4,-5) ,] # 指定行を削除
c93.r head(c93.r)
## Manufacturer Model Type Min.Price Price Max.Price MPG.city
## 1 Acura Integra Small 12.9 15.9 18.8 25
## 2 Acura Legend Midsize 29.2 33.9 38.7 18
## 3 Audi 90 Compact 25.9 29.1 32.3 20
## 6 Buick Century Midsize 14.2 15.7 17.3 22
## 7 Buick LeSabre Large 19.9 20.8 21.7 19
## 8 Buick Roadmaster Large 22.6 23.7 24.9 16
## MPG.highway AirBags DriveTrain Cylinders EngineSize Horsepower
## 1 31 None Front 4 1.8 140
## 2 25 Driver & Passenger Front 6 3.2 200
## 3 26 Driver only Front 6 2.8 172
## 6 31 Driver only Front 4 2.2 110
## 7 28 Driver only Front 6 3.8 170
## 8 25 Driver only Rear 6 5.7 180
## RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length
## 1 6300 2890 Yes 13.2 5 177
## 2 5500 2335 Yes 18.0 5 195
## 3 5500 2280 Yes 16.9 5 180
## 6 5200 2565 No 16.4 6 189
## 7 4800 1570 No 18.0 6 200
## 8 4000 1320 No 23.0 6 216
## Wheelbase Width Turn.circle Rear.seat.room Luggage.room Weight Origin
## 1 102 68 37 26.5 11 2705 non-USA
## 2 115 71 38 30.0 15 3560 non-USA
## 3 102 67 37 28.0 14 3375 non-USA
## 6 105 69 41 28.0 16 2880 USA
## 7 111 74 42 30.5 17 3470 USA
## 8 116 78 45 30.5 21 4105 USA
## Make cate.L
## 1 Acura Integra LM
## 2 Acura Legend H
## 3 Audi 90 H
## 6 Buick Century LM
## 7 Buick LeSabre M
## 8 Buick Roadmaster H
# 値が特定の行を抽出
# 条件式:== 等号,!= 否定等号,>= 以上,>より大,<=以下,<より小
# ds <- Cars93[Cars93$Type == "Small",]
<- subset(Cars93 , subset = Type == "Small") #TypeがSmallの行を抽出
ds table(ds$Type) #Smallが抽出されているか確認
##
## Compact Large Midsize Small Sporty Van
## 0 0 0 21 0 0
<- subset(Cars93 , subset = Price <= 20) #Priceが20以下の行を抽出
dp range(dp$Price) #Priceレンジが20以下か確認
## [1] 7.4 20.0
length(dp$Price) #length()関数:サンプル数を確認
## [1] 62
# dplyr
# filter(Cars93 , Type == "Small")
<- Cars93 %>% filter(Type == "Small") # pipe演算子(%>%)を使用
d.s table(d.s$Type)
##
## Compact Large Midsize Small Sporty Van
## 0 0 0 21 0 0
# 連続した列を抽出
<- Cars93[ , c(1:5) ]
c1_5 head(c1_5)
## Manufacturer Model Type Min.Price Price
## 1 Acura Integra Small 12.9 15.9
## 2 Acura Legend Midsize 29.2 33.9
## 3 Audi 90 Compact 25.9 29.1
## 4 Audi 100 Midsize 30.8 37.7
## 5 BMW 535i Midsize 23.7 30.0
## 6 Buick Century Midsize 14.2 15.7
.5 <- Cars93[ , c(4,5)] #列番号で指定した列のみ抽出
c4head(c4.5)
## Min.Price Price
## 1 12.9 15.9
## 2 29.2 33.9
## 3 25.9 29.1
## 4 30.8 37.7
## 5 23.7 30.0
## 6 14.2 15.7
<- Cars93[ , -c(4,5)] # 指定列を削除
c93.r head(c93.r)
## Manufacturer Model Type Max.Price MPG.city MPG.highway
## 1 Acura Integra Small 18.8 25 31
## 2 Acura Legend Midsize 38.7 18 25
## 3 Audi 90 Compact 32.3 20 26
## 4 Audi 100 Midsize 44.6 19 26
## 5 BMW 535i Midsize 36.2 22 30
## 6 Buick Century Midsize 17.3 22 31
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 1 None Front 4 1.8 140 6300
## 2 Driver & Passenger Front 6 3.2 200 5500
## 3 Driver only Front 6 2.8 172 5500
## 4 Driver & Passenger Front 6 2.8 172 5500
## 5 Driver only Rear 4 3.5 208 5700
## 6 Driver only Front 4 2.2 110 5200
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1 2890 Yes 13.2 5 177 102
## 2 2335 Yes 18.0 5 195 115
## 3 2280 Yes 16.9 5 180 102
## 4 2535 Yes 21.1 6 193 106
## 5 2545 Yes 21.1 4 186 109
## 6 2565 No 16.4 6 189 105
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin Make
## 1 68 37 26.5 11 2705 non-USA Acura Integra
## 2 71 38 30.0 15 3560 non-USA Acura Legend
## 3 67 37 28.0 14 3375 non-USA Audi 90
## 4 70 37 31.0 17 3405 non-USA Audi 100
## 5 69 39 27.0 13 3640 non-USA BMW 535i
## 6 69 41 28.0 16 2880 USA Buick Century
## cate.L
## 1 LM
## 2 H
## 3 H
## 4 H
## 5 H
## 6 LM
# dplyr
# MASSライブラリーを呼び出しているときはselect関数が重複するのでエラーになる.
# そのときはdplyr::selectとdplyrを指定する
<- dplyr::select(Cars93 , c(4 , 5))
r.s head(r.s)
## Min.Price Price
## 1 12.9 15.9
## 2 29.2 33.9
## 3 25.9 29.1
## 4 30.8 37.7
## 5 23.7 30.0
## 6 14.2 15.7
detach("package:MASS") # MASSライブラリーを外す
<- Cars93 %>% select(c(4,6)) # 列番号で抽出,列名でも可 Min.Price,Max.Price
r4_5 # デフォルトは大文字小文字の区別はしない.区別させたければ引数としてignore.case = FALSEを指定
<- Cars93 %>% select(4,6) # 第2引数はベクトルでなくても可
r4_5 head(r4_5)
## Min.Price Max.Price
## 1 12.9 18.8
## 2 29.2 38.7
## 3 25.9 32.3
## 4 30.8 44.6
## 5 23.7 36.2
## 6 14.2 17.3
.10 <- Cars93 %>% select(c(4:10)) # 連続した列を抽出
r4head(r4.10)
## Min.Price Price Max.Price MPG.city MPG.highway AirBags DriveTrain
## 1 12.9 15.9 18.8 25 31 None Front
## 2 29.2 33.9 38.7 18 25 Driver & Passenger Front
## 3 25.9 29.1 32.3 20 26 Driver only Front
## 4 30.8 37.7 44.6 19 26 Driver & Passenger Front
## 5 23.7 30.0 36.2 22 30 Driver only Rear
## 6 14.2 15.7 17.3 22 31 Driver only Front
<- Cars93 %>% select( -c(4:20)) # 指定列を除外
r_4_20 head(r_4_20)
## Manufacturer Model Type Width Turn.circle Rear.seat.room Luggage.room
## 1 Acura Integra Small 68 37 26.5 11
## 2 Acura Legend Midsize 71 38 30.0 15
## 3 Audi 90 Compact 67 37 28.0 14
## 4 Audi 100 Midsize 70 37 31.0 17
## 5 BMW 535i Midsize 69 39 27.0 13
## 6 Buick Century Midsize 69 41 28.0 16
## Weight Origin Make cate.L
## 1 2705 non-USA Acura Integra LM
## 2 3560 non-USA Acura Legend H
## 3 3375 non-USA Audi 90 H
## 4 3405 non-USA Audi 100 H
## 5 3640 non-USA BMW 535i H
## 6 2880 USA Buick Century LM
6.9.10 <- Cars93 %>% select( -c(4 , 6 , 9 , 10)) # ベクトル
r_4.6.9.10 <- Cars93 %>% select( -4 , -6 , -9 , -10) # ベクトルでない時
r_4.head(r_4.6.9.10)
## Manufacturer Model Type Price MPG.city MPG.highway Cylinders EngineSize
## 1 Acura Integra Small 15.9 25 31 4 1.8
## 2 Acura Legend Midsize 33.9 18 25 6 3.2
## 3 Audi 90 Compact 29.1 20 26 6 2.8
## 4 Audi 100 Midsize 37.7 19 26 6 2.8
## 5 BMW 535i Midsize 30.0 22 30 4 3.5
## 6 Buick Century Midsize 15.7 22 31 4 2.2
## Horsepower RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers
## 1 140 6300 2890 Yes 13.2 5
## 2 200 5500 2335 Yes 18.0 5
## 3 172 5500 2280 Yes 16.9 5
## 4 172 5500 2535 Yes 21.1 6
## 5 208 5700 2545 Yes 21.1 4
## 6 110 5200 2565 No 16.4 6
## Length Wheelbase Width Turn.circle Rear.seat.room Luggage.room Weight Origin
## 1 177 102 68 37 26.5 11 2705 non-USA
## 2 195 115 71 38 30.0 15 3560 non-USA
## 3 180 102 67 37 28.0 14 3375 non-USA
## 4 193 106 70 37 31.0 17 3405 non-USA
## 5 186 109 69 39 27.0 13 3640 non-USA
## 6 189 105 69 41 28.0 16 2880 USA
## Make cate.L
## 1 Acura Integra LM
## 2 Acura Legend H
## 3 Audi 90 H
## 4 Audi 100 H
## 5 BMW 535i H
## 6 Buick Century LM
<- Cars93 %>%
r.rname select("価格" = 5 , "タイプ" = Type) # 列抽出し列名を変更 ""はなくても可
head(r.rname)
## 価格 タイプ
## 1 15.9 Small
## 2 33.9 Midsize
## 3 29.1 Compact
## 4 37.7 Midsize
## 5 30.0 Midsize
## 6 15.7 Midsize
<- Cars93 %>% select( contains("Price")) # contains 列名の一部分でselect
r_contain head(r_contain)
## Min.Price Price Max.Price
## 1 12.9 15.9 18.8
## 2 29.2 33.9 38.7
## 3 25.9 29.1 32.3
## 4 30.8 37.7 44.6
## 5 23.7 30.0 36.2
## 6 14.2 15.7 17.3
<- Cars93 %>% select( contains("ice")) # contains 列名の一部分でselect
r_contain_p head(r_contain_p)
## Min.Price Price Max.Price
## 1 12.9 15.9 18.8
## 2 29.2 33.9 38.7
## 3 25.9 29.1 32.3
## 4 30.8 37.7 44.6
## 5 23.7 30.0 36.2
## 6 14.2 15.7 17.3
# rbind;列数と列名が同じ必要がある/列名が違っている場合列名をリネーム
<- data.frame( x12 = c(1:10)) # 1列,列名:x12
d1.x d1.x
## x12
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
<- data.frame( x12 = c(11:20)) # 1列,列名:x12
d1.y d1.y
## x12
## 1 11
## 2 12
## 3 13
## 4 14
## 5 15
## 6 16
## 7 17
## 8 18
## 9 19
## 10 20
rbind(d1.x,d1.y)
## x12
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
## 11 11
## 12 12
## 13 13
## 14 14
## 15 15
## 16 16
## 17 17
## 18 18
## 19 19
## 20 20
<- data.frame( x12 = c(1:10) , y12 = c(1:10) ) # 2列,列名:x12,y12
d2.x d2.x
## x12 y12
## 1 1 1
## 2 2 2
## 3 3 3
## 4 4 4
## 5 5 5
## 6 6 6
## 7 7 7
## 8 8 8
## 9 9 9
## 10 10 10
<- data.frame( x12 = c(11:20) , y12 = c(11:20) ) # 2列,列名:x12,y12
d2.y d2.y
## x12 y12
## 1 11 11
## 2 12 12
## 3 13 13
## 4 14 14
## 5 15 15
## 6 16 16
## 7 17 17
## 8 18 18
## 9 19 19
## 10 20 20
rbind(d2.x,d2.y)
## x12 y12
## 1 1 1
## 2 2 2
## 3 3 3
## 4 4 4
## 5 5 5
## 6 6 6
## 7 7 7
## 8 8 8
## 9 9 9
## 10 10 10
## 11 11 11
## 12 12 12
## 13 13 13
## 14 14 14
## 15 15 15
## 16 16 16
## 17 17 17
## 18 18 18
## 19 19 19
## 20 20 20
# bind_rows 列数と列名が異なっていても結合できる(NAで処理)
.1 <- data.frame( xx12 = c(1:10)) # xx12;1列10行
d3.2 <- data.frame( xx22 = c(31:41) , xy22 = c(41:51)) # xx22,yy22;2列11行
d3bind_rows(d3.1,d3.2)
## xx12 xx22 xy22
## 1 1 NA NA
## 2 2 NA NA
## 3 3 NA NA
## 4 4 NA NA
## 5 5 NA NA
## 6 6 NA NA
## 7 7 NA NA
## 8 8 NA NA
## 9 9 NA NA
## 10 10 NA NA
## 11 NA 31 41
## 12 NA 32 42
## 13 NA 33 43
## 14 NA 34 44
## 15 NA 35 45
## 16 NA 36 46
## 17 NA 37 47
## 18 NA 38 48
## 19 NA 39 49
## 20 NA 40 50
## 21 NA 41 51
.1 <- data.frame(id = seq(1:5) , d4.1 = c(11:15))
d4.2 <- data.frame(id = seq(1:5) , d4.2 = c(21:25))
d4merge( d4.1 , d4.2 , by = "id")
## id d4.1 d4.2
## 1 1 11 21
## 2 2 12 22
## 3 3 13 23
## 4 4 14 24
## 5 5 15 25
cbind(d4.1,d4.2) # 全ての変数が結合される
## id d4.1 id d4.2
## 1 1 11 1 21
## 2 2 12 2 22
## 3 3 13 3 23
## 4 4 14 4 24
## 5 5 15 5 25
# by変数は連番でなくてもOK
.1 <- data.frame(id = c(2,4,3,1,5) , d4.1 = c(11:15))
d7.2 <- data.frame(id = seq(1:5) , d4.2 = c(21:25))
d7merge( d7.1 , d7.2 , by = "id")
## id d4.1 d4.2
## 1 1 14 21
## 2 2 11 22
## 3 3 13 23
## 4 4 12 24
## 5 5 15 25
.1 <- data.frame(id = c(5:9) , d5.1 = c(11:15))
d5.2 <- data.frame(id = seq(1:5) , d5.2 = c(21:25))
d5merge( d5.1 , d5.2 , by = "id" , all = T) # byで指定した変数が非対応のデータも結合
## id d5.1 d5.2
## 1 1 NA 21
## 2 2 NA 22
## 3 3 NA 23
## 4 4 NA 24
## 5 5 11 25
## 6 6 12 NA
## 7 7 13 NA
## 8 8 14 NA
## 9 9 15 NA
merge( d5.1 , d5.2 , by = "id" , all = F) # byで指定した変数が対応するデータのみ結合
## id d5.1 d5.2
## 1 5 11 25
.1 <- data.frame(id = seq(1:5) , d6.1= c(11:15))
d6.1 d6
## id d6.1
## 1 1 11
## 2 2 12
## 3 3 13
## 4 4 14
## 5 5 15
.2 <- data.frame(ID = seq(1:5) , d6.2 = c(21:25))
d6.2 d6
## ID d6.2
## 1 1 21
## 2 2 22
## 3 3 23
## 4 4 24
## 5 5 25
merge( d6.1 , d6.2 , by.x = "id" , by.y = "ID")
## id d6.1 d6.2
## 1 1 11 21
## 2 2 12 22
## 3 3 13 23
## 4 4 14 24
## 5 5 15 25
%>% summarise( n = n()) Cars93
## n
## 1 93
<- Cars93 %>% group_by(Type) # Typeでグループ化
gp # 見た目変わらないが返り値を見ると,Groups:Type [6]となっている. gp
## # A tibble: 93 × 28
## # Groups: Type [6]
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <int> <int>
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## 2 Acura Legend Midsi… 29.2 33.9 38.7 18 25
## 3 Audi 90 Compa… 25.9 29.1 32.3 20 26
## 4 Audi 100 Midsi… 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsi… 23.7 30 36.2 22 30
## 6 Buick Century Midsi… 14.2 15.7 17.3 22 31
## 7 Buick LeSabre Large 19.9 20.8 21.7 19 28
## 8 Buick Roadmaster Large 22.6 23.7 24.9 16 25
## 9 Buick Riviera Midsi… 26.3 26.3 26.3 19 27
## 10 Cadillac DeVille Large 33 34.7 36.3 16 25
## # ℹ 83 more rows
## # ℹ 20 more variables: AirBags <fct>, DriveTrain <fct>, Cylinders <fct>,
## # EngineSize <dbl>, Horsepower <int>, RPM <int>, Rev.per.mile <int>,
## # Man.trans.avail <fct>, Fuel.tank.capacity <dbl>, Passengers <int>,
## # Length <int>, Wheelbase <int>, Width <int>, Turn.circle <int>,
## # Rear.seat.room <dbl>, Luggage.room <int>, Weight <int>, Origin <fct>,
## # Make <fct>, cate.L <ord>
%>% summarise(n = n()) # Typeでグループ化されている gp
## # A tibble: 6 × 2
## Type n
## <fct> <int>
## 1 Compact 16
## 2 Large 11
## 3 Midsize 22
## 4 Small 21
## 5 Sporty 14
## 6 Van 9
<- ungroup(gp) # グループ化解除 オブジェクト名は変えた方がいい
gp gp
## # A tibble: 93 × 28
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <int> <int>
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## 2 Acura Legend Midsi… 29.2 33.9 38.7 18 25
## 3 Audi 90 Compa… 25.9 29.1 32.3 20 26
## 4 Audi 100 Midsi… 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsi… 23.7 30 36.2 22 30
## 6 Buick Century Midsi… 14.2 15.7 17.3 22 31
## 7 Buick LeSabre Large 19.9 20.8 21.7 19 28
## 8 Buick Roadmaster Large 22.6 23.7 24.9 16 25
## 9 Buick Riviera Midsi… 26.3 26.3 26.3 19 27
## 10 Cadillac DeVille Large 33 34.7 36.3 16 25
## # ℹ 83 more rows
## # ℹ 20 more variables: AirBags <fct>, DriveTrain <fct>, Cylinders <fct>,
## # EngineSize <dbl>, Horsepower <int>, RPM <int>, Rev.per.mile <int>,
## # Man.trans.avail <fct>, Fuel.tank.capacity <dbl>, Passengers <int>,
## # Length <int>, Wheelbase <int>, Width <int>, Turn.circle <int>,
## # Rear.seat.room <dbl>, Luggage.room <int>, Weight <int>, Origin <fct>,
## # Make <fct>, cate.L <ord>
%>% summarise( n = n()) # n=93 gp
## # A tibble: 1 × 1
## n
## <int>
## 1 93
# グループ別統計量
%>% group_by(Type) %>% summarise(Mean = mean(Price),
Cars93 SD = sd(Price))
## # A tibble: 6 × 3
## Type Mean SD
## <fct> <dbl> <dbl>
## 1 Compact 18.2 6.69
## 2 Large 24.3 6.34
## 3 Midsize 27.2 12.3
## 4 Small 10.2 1.95
## 5 Sporty 19.4 7.97
## 6 Van 19.1 1.88
# Type別Origin別
<- Cars93 %>% group_by(Type , Origin)
g2 <-g2 %>% summarise(平均価格 = mean(Price))
g2m g2m
## # A tibble: 11 × 3
## # Groups: Type [6]
## Type Origin 平均価格
## <fct> <fct> <dbl>
## 1 Compact USA 12.8
## 2 Compact non-USA 22.4
## 3 Large USA 24.3
## 4 Midsize USA 21.8
## 5 Midsize non-USA 31.8
## 6 Small USA 10.0
## 7 Small non-USA 10.2
## 8 Sporty USA 19.4
## 9 Sporty non-USA 19.4
## 10 Van USA 18.3
## 11 Van non-USA 20.2
<- ungroup(g2)
g2 # まとめてコードを書く
%>% group_by(Type , Origin) %>% summarise(平均価格 = mean(Price)) Cars93
## # A tibble: 11 × 3
## # Groups: Type [6]
## Type Origin 平均価格
## <fct> <fct> <dbl>
## 1 Compact USA 12.8
## 2 Compact non-USA 22.4
## 3 Large USA 24.3
## 4 Midsize USA 21.8
## 5 Midsize non-USA 31.8
## 6 Small USA 10.0
## 7 Small non-USA 10.2
## 8 Sporty USA 19.4
## 9 Sporty non-USA 19.4
## 10 Van USA 18.3
## 11 Van non-USA 20.2
<- Cars93[, c(5,7,8,12,13,14)] # 5,7,8,12,13,14列を抽出
z <- scale(z) # scale関数で標準化するとclassは"matrix" "array"
std # std <- data.frame(scale(z)) # data.frameで持つ場合
library(psych)
describe(std)
## vars n mean sd median trimmed mad min max range skew kurtosis
## Price 1 93 0 1 -0.19 -0.13 0.86 -1.25 4.39 5.64 1.48 3.05
## MPG.city 2 93 0 1 -0.24 -0.13 0.79 -1.31 4.21 5.52 1.65 3.58
## MPG.highway 3 93 0 1 -0.20 -0.09 0.83 -1.70 3.92 5.63 1.19 2.30
## EngineSize 4 93 0 1 -0.26 -0.10 0.86 -1.61 2.92 4.53 0.83 0.23
## Horsepower 5 93 0 1 -0.07 -0.09 0.85 -1.70 2.98 4.68 0.92 0.90
## RPM 6 93 0 1 -0.14 0.05 0.99 -2.48 2.04 4.52 -0.25 -0.51
## se
## Price 0.1
## MPG.city 0.1
## MPG.highway 0.1
## EngineSize 0.1
## Horsepower 0.1
## RPM 0.1
<- scale(z) * 10 + 50
h describe(h)
## vars n mean sd median trimmed mad min max range skew
## Price 1 93 50 10 48.13 48.74 8.60 37.46 93.88 56.42 1.48
## MPG.city 2 93 50 10 47.57 48.66 7.91 36.89 92.06 55.16 1.65
## MPG.highway 3 93 50 10 47.96 49.09 8.34 32.96 89.23 56.27 1.19
## EngineSize 4 93 50 10 47.42 48.97 8.58 33.92 79.23 45.31 0.83
## Horsepower 5 93 50 10 49.27 49.07 8.49 33.04 79.82 46.78 0.92
## RPM 6 93 50 10 48.65 50.48 9.94 25.19 70.43 45.25 -0.25
## kurtosis se
## Price 3.05 1.04
## MPG.city 3.58 1.04
## MPG.highway 2.30 1.04
## EngineSize 0.23 1.04
## Horsepower 0.90 1.04
## RPM -0.51 1.04
# scale関数の中心化は平均値を引いている
<- scale(z , scale = F)
C # C <- data.frame(scale(z , scale = F))
describe(C)
## vars n mean sd median trimmed mad min max range
## Price 1 93 0 9.66 -1.81 -1.22 8.30 -12.11 42.39 54.5
## MPG.city 2 93 0 5.62 -1.37 -0.75 4.45 -7.37 23.63 31.0
## MPG.highway 3 93 0 5.33 -1.09 -0.49 4.45 -9.09 20.91 30.0
## EngineSize 4 93 0 1.04 -0.27 -0.11 0.89 -1.67 3.03 4.7
## Horsepower 5 93 0 52.37 -3.83 -4.88 44.48 -88.83 156.17 245.0
## RPM 6 93 0 596.73 -80.65 28.69 593.04 -1480.65 1219.35 2700.0
## skew kurtosis se
## Price 1.48 3.05 1.00
## MPG.city 1.65 3.58 0.58
## MPG.highway 1.19 2.30 0.55
## EngineSize 0.83 0.23 0.11
## Horsepower 0.92 0.90 5.43
## RPM -0.25 -0.51 61.88