Reassignment.Method.FILTER<-function(file.name, wd, cwd,FILTER){ setwd (wd) #read in the conversion table for the methods if (FILTER == "stringent"){ re.assignment <- read.csv("method.codes_STRINGENT.txt", sep="\t") } if (FILTER == "lenient"){ re.assignment <- read.csv("method.codes_LENIENT.txt", sep="\t") } code <- re.assignment[,2] ID <- re.assignment [,1] code<-as.vector(code) ID<-as.vector(ID) #read in the files setwd(paste(cwd, "/Parsed-QCed", sep="")) file <- read.csv(file.name, sep="\t") meth <- file[,7] meth <- as.vector (meth) #extract the method code to use for the conversion ### check for multiple methods in the same entry l <- length(meth) met_div <- strsplit(meth, ":") i <-1 j <-1 duplicates_meth <- integer() w <-1 pm <- integer() for (i in 1:l) { pp <- met_div[[i]] pm <- grep ("MI", pp, value=FALSE) # Keep the entries when there is just one meth (MI:), flag the position where more method codes are found for the same entry if (length(pm) > 1 | length(pm) == 0) { duplicates_meth[w] <- i w <- w+1 } i <- i+1 } ### Remove the entries containing multiple methods REMOVE_FILE <- FALSE if (length(duplicates_meth) > 0 & length(duplicates_meth)!=l) { file <- file[-c(duplicates_meth),] meth <- file[,7] meth <- as.vector (meth) } if (length(duplicates_meth) > 0 & length(duplicates_meth)==l) { REMOVE_FILE <- TRUE } ### # Divide the method field to compare with the reference file l <- length(meth) met <- strsplit(meth, ":") iA <- data.frame(matrix(unlist(met), nrow=length(met), byrow=T),stringsAsFactors=FALSE) meth <- iA[,3] met <- strsplit(meth, "\\(") iA <- data.frame(matrix(unlist(met), nrow=length(met), byrow=T),stringsAsFactors=FALSE) meth <- iA[,1] meth <- paste("MI:", meth, sep="") meth2 <- iA[,2] met2 <- strsplit(meth2, "\\)") iB <- data.frame(matrix(unlist(met2), nrow=length(met2), byrow=T),stringsAsFactors=FALSE) meth2 <- iB[,1] #do the conversion based on the methods reference file conv.meth <- vector() pos.del.opt <- integer() l<-length(meth) i<-1 flag <- FALSE count <- 0 for (i in 1:l){ pos <- match (meth[i], code) conv.meth[i] <- ID[pos] #flag the entries to be deleted based on the method NOT CONVERTED - OPTIONAL flag <- is.na(pos) if (flag == TRUE) { conv.meth[i] <- "NULL" count <- count + 1 pos.del.opt[count] <- i } i <- i+1 } #add the new columns containing the converted methods end<-ncol(file) fileA <- file[,1:6] fileB <- file[,7:end] result1 <- cbind (fileA,conv.meth,meth,meth2,fileB) #remove the flagged entries OPTIONAL #if (length(pos.del.opt) > 0) { # result1 <- result1[-c(pos.del.opt),] #} #flag the entries to be deleted based on the method field not passing QC i<-1 pos.del <- integer() count2 <- 0 conv.meth.new <- as.vector(result1[,7]) ll <- length (conv.meth.new) for (i in 1:ll){ val <- conv.meth.new[i] if (val == "UNSPM") { count2 <- count2 + 1 pos.del[count2] <- i } i <- i+1 } #remove the flagged entries if (length(pos.del) > 0) { result1 <- result1[-c(pos.del),] } final <- nrow(result1) if (final >0){ result.name <- paste(file.name, ".method.txt", sep="") no <- paste(cwd, "/Method-Reassigned/", result.name, sep="") write.table (result1, no, quote=FALSE, row.names=FALSE, sep="\t") } if (final ==0 | REMOVE_FILE == TRUE){ result.name<-paste(file.name, ".QCmethod-NOTpassed.txt", sep="") no <- paste(cwd, "/LogFiles/", result.name, sep="") write.table (result1, no, quote=FALSE, row.names=FALSE, sep="\t") } Non.Converted <- count Unspecified_OR_MultipleMethods <- count2 + length(duplicates_meth) result2 <- cbind(file.name, Non.Converted, Unspecified_OR_MultipleMethods) no2 <- paste(cwd, "/temp/", file.name, sep="") write.table (result2, no2, quote=FALSE, row.names=FALSE, sep="\t") }