如果你会R语言可以使用下面的代码随机比例分组:
使用R包:caret
library(caret)
phenotypes <- read.table("phenotypes.txt",sep = "\t",header = T)
head(phenotypes)
#sampleID weight gender
#ID1 150 F
#ID2 160 F #ID3 290 M #ID4 155 M
#不考虑分组 随机抽80%样本
train_indices <- createDataPartition(y = phenotypes$sampleID, p = 0.8, list = FALSE)
train_set <- phenotypes[train_indices, ]
test_set <- phenotypes[-train_indices, ] #如果考虑分组: 例如每种性别随机80% train_indices <- createDataPartition(y = phenotypes$gender, p = 0.8, list = FALSE) train_set <- phenotypes[train_indices, ] test_set <- phenotypes[-train_indices, ]