3 回答
TA贡献1865条经验 获得超7个赞
我不知道是否有被自动执行此功能,但现在没有;)
## A function to anonymise columns in 'colIDs'
## colIDs can be either column names or integer indices
anonymiseColumns <- function(df, colIDs) {
id <- if(is.character(colIDs)) match(colIDs, names(df)) else colIDs
for(id in colIDs) {
prefix <- sample(LETTERS, 1)
suffix <- as.character(as.numeric(as.factor(df[[id]])))
df[[id]] <- paste(prefix, suffix, sep="")
}
names(df)[id] <- paste("V", id, sep="")
df
}
## A data.frame containing sensitive information
df <- data.frame(
name = rep(readLines(file.path(R.home("doc"), "AUTHORS"))[9:13], each=2),
hiscore = runif(10, 99, 100),
passwd = replicate(10, paste(sample(c(LETTERS, letters), 9), collapse="")))
## Anonymise it
df2 <- anonymiseColumns(df, c(1,3))
## Check that it worked
> head(df, 3)
name hiscore passwd
1 Douglas Bates 99.96714 ROELIAncz
2 Douglas Bates 99.07243 gDOLNMyVe
3 John Chambers 99.55322 xIVPHDuEW
> head(df2, 3)
name hiscore V3
1 Q1 99.96714 V8
2 Q1 99.07243 V2
3 Q2 99.55322 V9
- 3 回答
- 0 关注
- 569 浏览
添加回答
举报