函数内部的列操作将数据帧变为空或意外的值
问题描述:
我有一个运行良好的函数,我想通过删除其中一列并使用这些命令将另一列中的NA值转换为1来清理输出:函数内部的列操作将数据帧变为空或意外的值
df$PlateIden <- NULL
df$PlateNum[is.na(df$PlateNum)] <- 1
,当我直接使用我的功能之外的数据帧,但是当我在函数中使用他们我的数据框变成要么是“空”或“1”,如果任一功能的值。这些工作用过的。
这里是我的全部功能:
cleanup_safe <- function(df,addproject,adduser){
colnames(df) <- "FileName"
df$RunDate <- str_match(df$FileName, "^[a-zA-Z ]*(\\d+)")[,2]
df$RunDate <- ymd(df$RunDate)
df$PlateNum <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?(\\d)")[,2]
df$PlateIden <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?\\d*[_ ]?([a-zA-Z])")[,2]
df$User <- "adduser"
df$Project <- "addproject"
df <- df[!duplicated(df[,c("User","Project","RunDate","PlateNum")]),]
df <- within(df, ID <- cumsum(!duplicated(df[c("User","Project","RunDate")])))
df$PlateIden <- NULL
df$PlateNum[is.na(df$PlateNum)] <- 1
}
下面是测试数据集
test <- c("20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, MAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, SAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, MAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, SAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, MAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, SAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx")
dataframe <- as.data.frame(test)
dataframe <- cleanup_safe(dataframe,testproject,testuser)
答
与
setDT(df)
df[is.na(PlateNum), PlateNum := 1].
所以更换df$PlateNum[is.na(df$PlateNum)] <- 1
,你的整个会是这样的:
使用data.table
library(data.table)
test <- c("20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, MAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, SAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, MAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, SAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, MAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, SAF.srbx",
"20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, MAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, SAF.srbx",
"20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx",
"20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx",
"20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx")
dataframe <- as.data.frame(test)
cleanup_safe <- function(df,addproject,adduser){
colnames(df) <- "FileName"
df$RunDate <- str_match(df$FileName, "^[a-zA-Z ]*(\\d+)")[,2]
df$RunDate <- ymd(df$RunDate)
df$PlateNum <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?(\\d)")[,2]
df$PlateIden <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?\\d*[_ ]?([a-zA-Z])")[,2]
df$User <- "adduser"
df$Project <- "addproject"
df <- df[!duplicated(df[,c("User","Project","RunDate","PlateNum")]),]
df <- within(df, ID <- cumsum(!duplicated(df[c("User","Project","RunDate")])))
df <- df[, !(names(df) %in% "PlateIden"), drop = F]
setDT(df)
df[is.na(PlateNum), PlateNum := 1]
}
dataframe <- cleanup_safe(dataframe,testproject,testuser)
这会给你的输出: -
FileName RunDate PlateNum User Project ID
1: 20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, MAF.srbx 2016-08-01 1 adduser addproject 1
2: 20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, MAF.srbx 2016-08-01 2 adduser addproject 1
3: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot, MAF.srbx 2016-08-02 1 adduser addproject 2
4: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, MAF.srbx 2016-08-02 2 adduser addproject 2
5: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, MAF.srbx 2016-08-02 3 adduser addproject 2
6: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, MAF.srbx 2016-08-02 4 adduser addproject 2
7: 20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx 2016-08-12 1 adduser addproject 3
8: 20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx 2016-08-12 2 adduser addproject 3
+1
谢谢Suchait!希望我理解,而原来不工作,但哦。 – AwesomeeExpress
+0
不客气。感谢您指出了这一点。因为它很奇怪。我不知道为什么最后一行代码不在函数内部工作。 – suchait
你能与我们共享的数据? – suchait
添加了测试数据集 – AwesomeeExpress
您的测试数据是一个'data.frame'单个列吗?那是对的吗?如果你想分享样本数据,最好使用'dput'。 –