Rで自動的に名義変数らしきものを判別して、自動的にt検定とかfisher検定をする
Rで自動的に名義変数らしきものを判別して、自動的にt検定とかfisher検定をする
自動的に、お手軽にt検定やFisher exact testなどの結果を返す関数を作った。
いちいちfactor設定するのも面倒なので、upper.col.no以下の水準数要素数ならば 自動的にfactor扱いとしてFisher検定を出力する。 もちろんfactor指定があれば、それはFisher検定を出力する。 それ以外の数値ならば、t検定やU検定やlevene検定のP値を返す。
結果はlistなので、あとは適当にいじればよいだろう。 こういった簡単な事を聞かれることも多いので、今後は捗るはず。
適当に作った関数なのでエラーが出なけりゃラッキー程度に考えていただきたい。
一気に見栄えの良い結果を出力できるように将来改造したい。
作ってる過程でformulaの構造とか、勉強になった。
なお、時間がかかりすぎるのでexact U testは、オフにしているので普通のUtestの結果になっている。必要ならばdistribution="exact"にしてもらうと良いが、死ぬほど時間がかかる。
# fisherテストを自動に出力する
auto.fisher.out <- function(data = data, group.name = "group", upper.col.no = 5) {
## 列のパーセンテージを出す関数
colPercents <- function(x) {
round(t(t(x)/colSums(x)) * 100, digits = 1)
}
table1 <- apply(data, 2, table)
lapply(table1, length)
ss <- sapply(table1, length)
factor.yes <- as.numeric(ss <= upper.col.no)
ss2 <- apply(data, 2, class)
ss3 <- as.logical(ss2 == "character" | ss2 == "factor" | ss2 == "logical")
factor.yes <- as.logical(factor.yes)
factor.yes <- (factor.yes | ss3)
factor.yes <- as.numeric(factor.yes) - as.numeric(colnames(data) == group.name)
factor.yes <- as.logical(factor.yes)
group.yes <- (colnames(data) == group.name)
factor.names <- colnames(data)[as.logical(factor.yes)]
scale.yes <- !as.logical(factor.yes)
scale.names <- colnames(data)[as.logical(scale.yes)]
scale.yes <- as.numeric(scale.yes) - as.numeric(colnames(data) == group.name)
scale.yes <- as.logical(scale.yes)
scale.names <- colnames(data)[as.logical(scale.yes)]
res.fisher <- list()
for (i in factor.names) {
form.temp <<- as.formula(paste("~", as.character(i), "+", as.character(group.name),
collapse = " "))
table.temp <- as.table(xtabs(form.temp, data = data))
res.fisher[[i]]["table"] <- list(xtabs(form.temp, data = data))
res.fisher[[i]]["col.percents"] <- list(colPercents(xtabs(form.temp,
data = data)))
res.fisher[[i]]["p.value"] <- fisher.test(table.temp)$p.value
}
return(res.fisher)
}
auto.ttest.out <- function(data = data, group.name = "group", upper.col.no = 5) {
library(car)
library(coin)
table1 <- apply(data, 2, table)
lapply(table1, length)
ss <- sapply(table1, length)
factor.yes <- as.numeric(ss <= upper.col.no)
ss2 <- apply(data, 2, class)
ss3 <- as.logical(ss2 == "character" | ss2 == "factor" | ss2 == "logical")
factor.yes <- as.logical(factor.yes)
factor.yes <- (factor.yes | ss3)
factor.yes <- as.numeric(factor.yes) - as.numeric(colnames(data) == group.name)
factor.yes <- as.logical(factor.yes)
group.yes <- (colnames(data) == group.name)
factor.names <- colnames(data)[as.logical(factor.yes)]
scale.yes <- !as.logical(factor.yes)
scale.names <- colnames(data)[as.logical(scale.yes)]
scale.yes <- as.numeric(scale.yes) - as.numeric(colnames(data) == group.name)
scale.yes <- as.logical(scale.yes)
scale.names <- colnames(data)[as.logical(scale.yes)]
res.ttest <- list()
for (i in scale.names) {
form.temp <<- as.formula(paste(as.character(i), "~", as.character(group.name),
collapse = " "))
form.temp2 <<- as.formula(paste(as.character(i), "~", "as.factor(",
as.character(group.name), ")", collapse = " "))
temp.welch <- t.test(form.temp, var.equal = FALSE, data = data, na.action = "na.omit")
temp.t.test <- t.test(form.temp, var.equal = TRUE, data = data, na.action = "na.omit")
res.ttest[[i]]["Welch.t.test_p.value"] <- temp.welch$p.value
res.ttest[[i]]["t.test_p.value"] <- temp.t.test$p.value
res.ttest[[i]]["levene.test_p.value"] <- leveneTest(form.temp2, data = data)["Pr(>F)"][1,
1]
res.ttest[[i]]["U.test_p.value"] <- res.ttest[[i]]["U.test_p.value"] <- wilcox.test(form.temp,
alternative = "two.sided", exact = FALSE, correct = FALSE, data = data)$p.value
wt <- wilcox_test(form.temp2, data = data)
res.ttest[[i]]["exact.U.test_p.value"] <- pvalue(wt)
res.ttest[[i]]["sd"] <- list(tapply(unlist(data[i]), factor(unlist(data[group.name])),
FUN = sd, na.rm = TRUE))
}
res.ttest[["summary"]] <- by(data[scale.names], data[group.name], summary)
return(res.ttest)
}
library(Matching)
## Loading required package: MASS
## ##
## ## Matching (Version 4.8-3.4, Build Date: 2013/10/28)
## ## See http://sekhon.berkeley.edu/matching for additional documentation.
## ## Please cite software as:
## ## Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ## Software with Automated Balance Optimization: The Matching package for R.''
## ## Journal of Statistical Software, 42(7): 1-52.
## ##
data(lalonde)
auto.fisher.out(data = lalonde, group.name = "treat", upper.col.no = 5)
## $black
## $black$table
## treat
## black 0 1
## 0 45 29
## 1 215 156
##
## $black$col.percents
## treat
## black 0 1
## 0 17.3 15.7
## 1 82.7 84.3
##
## $black$p.value
## [1] 0.6994
##
##
## $hisp
## $hisp$table
## treat
## hisp 0 1
## 0 232 174
## 1 28 11
##
## $hisp$col.percents
## treat
## hisp 0 1
## 0 89.2 94.1
## 1 10.8 5.9
##
## $hisp$p.value
## [1] 0.08938
##
##
## $married
## $married$table
## treat
## married 0 1
## 0 220 150
## 1 40 35
##
## $married$col.percents
## treat
## married 0 1
## 0 84.6 81.1
## 1 15.4 18.9
##
## $married$p.value
## [1] 0.3688
##
##
## $nodegr
## $nodegr$table
## treat
## nodegr 0 1
## 0 43 54
## 1 217 131
##
## $nodegr$col.percents
## treat
## nodegr 0 1
## 0 16.5 29.2
## 1 83.5 70.8
##
## $nodegr$p.value
## [1] 0.001661
##
##
## $u74
## $u74$table
## treat
## u74 0 1
## 0 65 54
## 1 195 131
##
## $u74$col.percents
## treat
## u74 0 1
## 0 25.0 29.2
## 1 75.0 70.8
##
## $u74$p.value
## [1] 0.3304
##
##
## $u75
## $u75$table
## treat
## u75 0 1
## 0 82 74
## 1 178 111
##
## $u75$col.percents
## treat
## u75 0 1
## 0 31.5 40.0
## 1 68.5 60.0
##
## $u75$p.value
## [1] 0.07018
auto.ttest.out(data = lalonde, group.name = "treat", upper.col.no = 5)
## Loading required package: survival
## Loading required package: splines
## $age
## $age$Welch.t.test_p.value
## [1] 0.2659
##
## $age$t.test_p.value
## [1] 0.2648
##
## $age$levene.test_p.value
## [1] 0.9219
##
## $age$U.test_p.value
## [1] 0.2148
##
## $age$exact.U.test_p.value
## [1] 0.2148
##
## $age$sd
## 0 1
## 7.058 7.155
##
##
## $educ
## $educ$Welch.t.test_p.value
## [1] 0.1502
##
## $educ$t.test_p.value
## [1] 0.1354
##
## $educ$levene.test_p.value
## [1] 0.007737
##
## $educ$U.test_p.value
## [1] 0.05568
##
## $educ$exact.U.test_p.value
## [1] 0.05568
##
## $educ$sd
## 0 1
## 1.614 2.011
##
##
## $re74
## $re74$Welch.t.test_p.value
## [1] 0.9819
##
## $re74$t.test_p.value
## [1] 0.9823
##
## $re74$levene.test_p.value
## [1] 0.9823
##
## $re74$U.test_p.value
## [1] 0.3602
##
## $re74$exact.U.test_p.value
## [1] 0.3602
##
## $re74$sd
## 0 1
## 5688 4887
##
##
## $re75
## $re75$Welch.t.test_p.value
## [1] 0.3853
##
## $re75$t.test_p.value
## [1] 0.3823
##
## $re75$levene.test_p.value
## [1] 0.3823
##
## $re75$U.test_p.value
## [1] 0.06076
##
## $re75$exact.U.test_p.value
## [1] 0.06076
##
## $re75$sd
## 0 1
## 3103 3219
##
##
## $re78
## $re78$Welch.t.test_p.value
## [1] 0.007893
##
## $re78$t.test_p.value
## [1] 0.004788
##
## $re78$levene.test_p.value
## [1] 0.01387
##
## $re78$U.test_p.value
## [1] 0.01093
##
## $re78$exact.U.test_p.value
## [1] 0.01093
##
## $re78$sd
## 0 1
## 5484 7867
##
##
## $summary
## treat: 0
## age educ re74 re75
## Min. :17.0 Min. : 3.0 Min. : 0 Min. : 0
## 1st Qu.:19.0 1st Qu.: 9.0 1st Qu.: 0 1st Qu.: 0
## Median :24.0 Median :10.0 Median : 0 Median : 0
## Mean :25.1 Mean :10.1 Mean : 2107 Mean : 1267
## 3rd Qu.:28.0 3rd Qu.:11.0 3rd Qu.: 139 3rd Qu.: 650
## Max. :55.0 Max. :14.0 Max. :39571 Max. :23032
## re78
## Min. : 0
## 1st Qu.: 0
## Median : 3139
## Mean : 4555
## 3rd Qu.: 7288
## Max. :39484
## --------------------------------------------------------
## treat: 1
## age educ re74 re75
## Min. :17.0 Min. : 4.0 Min. : 0 Min. : 0
## 1st Qu.:20.0 1st Qu.: 9.0 1st Qu.: 0 1st Qu.: 0
## Median :25.0 Median :11.0 Median : 0 Median : 0
## Mean :25.8 Mean :10.3 Mean : 2096 Mean : 1532
## 3rd Qu.:29.0 3rd Qu.:12.0 3rd Qu.: 1291 3rd Qu.: 1817
## Max. :48.0 Max. :16.0 Max. :35040 Max. :25142
## re78
## Min. : 0
## 1st Qu.: 485
## Median : 4232
## Mean : 6349
## 3rd Qu.: 9643
## Max. :60308