# libRK.r # Type: R-Program # Title: Library of Basic Functions used in R-Programs # written by Reinhard Klenke # Version: 0.1 # Author/s: Dr. Reinhard Klenke # Institution: Society for Nature Conservation and Landscape Ecology # Street: Dorfstr. 31 # Town: 17237 Kratzeburg # Country: G E R M A N Y # URL: http://www.gnl-kratzeburg.de # Email: klenke [at] gnl-kratzeburg.de, # reinhard.klenke [at] ufz.de # Abstract: # This library is a compilation of basic functions which were used in # some other of my R-programs by the inclusion command # # source ("/BasicFunctions.r") # # but each of the functions can be used also stand alone in other # R-programs by copy and paste if you accept the GPL and refer to the # authorship and copyright. # Keywords: Basic Functions # Copyright (C) 2007 by Reinhard Klenke # License: # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Please inform me if you find bugs and mistakes. # History: # -------------------------------------------------------------------- # cat ("\ndiscriminant.r\n Copyright (C) 2007 by Reinhard Klenke This program comes with ABSOLUTELY NO WARRANTY; for details see the source code.\n\n This is free software, and you are welcome to redistribute it under certain conditions; see the source code for details.\n\n") # Define basic functions, e.g. for user interactions # If you will use the routines you should create a file or an object # with variable names and descriptions with the following structure: # # "VARNAMES";"DESCRIPTION" # "Variable1";"Description of variable 1" # "Variable2";"Description of variable 2" # ... # The names and descriptions will be used for labeling of axes and # datapoints in graphs # "NextStep" # Should be used together with a switch called "RunningFast" # at the beginning of the program. # If # RunningFast <- TRUE # the program stops at NextStep () and will be continued after typing # any key. Otherway it will jump over each line with NextStep (). # This is especially usefull for the interactive use of programs. NextStep <- function() { ifelse (exists ("RunningFast"),"", RunningFast <- FALSE) ok <- "n" while (substr (ok, 1, 1) == "n") { ifelse (RunningFast == TRUE, break, "") ok <-readline("Please type any key: ") } message ("ok\n") ok <- "y" return (cat ("I will continue!\n")) } # "myHistogram" # Draws a histogram in grey with black lines, a title and # labels. For the labels the information contained in the variable # description file will be used. myHistogram <- function (VarName, VarData, VarDescription) { myTitle <- paste ("Histogram of" , VarName) hist (VarData, col="grey60", border="black", xlab=VarDescription, main=myTitle) } # "saveWMF" # Saves graphics in windows metafile format. saveWMF <- function (Var) { savePlot (filename=Var, type="wmf", device=dev.cur()) } # "savePNG" # Saves graphics in PNG format. savePNG <- function (Var) { savePlot (filename=Var, type="png", device=dev.cur()) } # "saveGraphs" # Saves graphics in both formats mentioned above in two # separate directories (outWMF and outHTML) which have to be declared # in the main program! saveGraphs <- function (Var) { savePlot (filename = file.path (outWMF, Var), type="wmf", device=dev.cur()) savePlot (filename =file.path (outHTML, Var), type="png", device=dev.cur()) } # "RemoveBlankVars" # The following function removes variables from a data set wich cases # do have only blank values (zeros). RemoveBlankVars <- function (DataVar) { if (min (abs (mean (DataVar), na.rm = TRUE)) == 0) { VarNamesNew <- "" DataNew <- 0 NumVars <- ncol (DataVar) for (i in 1:NumVars) { if (mean (DataVar [i]) > 0) { VarNamesNew <- c (VarNamesNew, VarNames [i]) if (i == 1) {DataNew <- DataVar [i]} else { DataNew <- cbind (DataNew, DataVar [i])} } } cat ("\nRemoveBlankVars has removed some variables!\n") return (DataNew) } else { cat ("\nRemoveBlankVars has not removed any variables!\n") return (DataVar) } } # "DistributionCharacter" # Creates a table with quantiles. DistributionCharacter <- function (SampleGroup_01, SampleGroup_02) { probabilities <- c (0,5,25,50,75,95,100) / 100 NumVars <- length (colnames (SampleGroup_01)) for (i in 1:(NumVars-1)) { cat ( paste ("Variable:", colnames (SampleGroup_01) [i], "\n", "Sample 1", "\n") ) print ( quantile (SampleGroup_01 [ ,i], probabilities, type=7) ) cat (paste ("Sample 2", "\n")) print ( quantile (SampleGroup_02 [ ,i], probabilities, type=7) ) cat ("\n") } } # "panel.cor" # Put (absolute) correlations on the upper panels, # with size proportional to the correlations. panel.cor <- function (x, y, digits=2, prefix="", cex.cor) { usr <- par ("usr"); on.exit (par (usr)) par(usr = c (0, 1, 0, 1)) r <- abs (cor (x, y)) txt <- format( c (r, 0.123456789), digits=digits)[1] txt <- paste (prefix, txt, sep="") if(missing (cex.cor)) cex <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex * r) } # "PlotLogRegFit" # Plot the logistic (binomial) regression function build on basis of # the name of the response variable and one of the regression variables PlotLogRegFit <- function (PlotResponseName, PlotVarName) { PlotModelName <- paste (PlotResponseName, "~", PlotVarName) PlotResponse <- get (PlotResponseName) PlotVar <- get (PlotVarName) PlotModel <- glm (PlotResponse~PlotVar,binomial) xv <- seq (min (PlotVar), max (PlotVar), (max (PlotVar) - min (PlotVar))/ 100) yv <- predict (PlotModel,list (PlotVar = xv), type = "response") PlotVarCut <- cut (PlotVar, 3) PlotVar3rds <- tapply (PlotResponse, PlotVarCut, sum) ClassFreq3rds <- table (PlotVarCut) central3rd <- PlotVar3rds [2] / ClassFreq3rds [2] se.central3rd <- sqrt (central3rd*(1-central3rd)/ClassFreq3rds [2]) plot (PlotVar, PlotResponse, main = "Logistic Regression", xlab = PlotVarName, ylab = PlotResponseName, font.lab = 2) lines (xv,yv) points (median (PlotVar), central3rd, pch=16) lines (c (median (PlotVar), median (PlotVar)), c (central3rd - se.central3rd, central3rd + se.central3rd)) } # "LogisticRegression" # Performs a logistic regression for several different models described # in the ModelListVar(iable) on basis of the data contained in the # DataVar. The function prints statistics and gives back a list # containing RegOutput with the results of the regression, # RegOutputBackward with the results of the models selected in a # backward variable reduction process and AnovaOutput with the results # of an ANOVA of each of the models. LogisticRegression <- function (ModelListVar, DataVar) { NumRegModels <- length (ModelListVar$Structure) LogReg <- list () Model <- list () LogRegResults <- list () LogRegResultsBackward <- list () LogRegAnova <- list () par (mfrow=c (2,2)) for (i in 1:NumRegModels) { Model <- eval (parse (file = "", text = ModelListVar$Structure [i])) LogRegResults [[i]] <- glm (Model, data = DataVar, family = binomial (link=logit)) # Print statistics print (summary (LogRegResults [[i]])) print (summary (LogRegResultsBackward [[i]] <- step (LogRegResults [[i]]))) print (resid (LogRegResults [[i]])) print (fitted (LogRegResults [[i]])) print (predict (LogRegResults [[i]], DataVar, type = "response")) LogRegAnova [[i]] <- (anova (LogRegResults [[i]], test = "Chisq")) print (LogRegAnova [[i]]) # Make plots plot (LogRegResults [[i]], ask = FALSE) CurrentFileName <- paste ("LogReg", "Model", i, sep = "_") saveGraphs (CurrentFileName) } dev.off () LogReg <- list (RegOutput = LogRegResults, RegOutputBackward = LogRegResultsBackward, AnovaOutput = LogRegAnova) return (LogReg) } # "ShowPredictorFit" # This function shows the fit of several models with binomial link # function contained in the ModelListVar on basis of the data submitted # in the DataVar ShowPredictorFit <- function (ModelListVar, DataVar) { PredictorData <- DataVar attach (PredictorData) NumRegModels <- length (ModelListVar$Structure) NumPredictors <- 0 # The following two lines are only necessary to avoid errors # if it is a model with interactions Components <- gsub ("[[:space:]]","",ModelListVar$Structure) Components <- gsub ("\\*","\\+",ModelListVar$Structure) ScratchVar <- strsplit (Components,"~") for (i in 1:NumRegModels) { Components$Response [i] <- gsub ("[[:space:]]", "", ScratchVar [[i]] [1]) Components$Predictors [i] <- gsub ("[[:space:]]", "", ScratchVar [[i]] [2]) } Predictors <- strsplit (Components$Predictors,"\\+") par (mfrow=c (1,1), oma = c(1,1,4,1), cex.main = 0.8) for (i in 1:NumRegModels) { NumPredictors <- length (Predictors [[i]]) for (j in 1:NumPredictors) { isColon <- regexpr (":", Predictors [[i]] [j]) if ( isColon < 1) { # remove interaction terms PlotLogRegFit (Components$Response [i], Predictors [[i]] [j]) MyTitle <- paste ("Prediction\n glm (", Components$Response [i], "~", Predictors [[i]] [j], ", binomial)") title (main = MyTitle, outer = TRUE) CurrentFileName <- paste ("Fit", Components$Response [i], Predictors [[i]] [j], sep = "_") # Name the graphics and save each plot as a *.wmf file saveGraphs (CurrentFileName) } else { cat ("test") } } } detach (PredictorData) rm (PredictorData) } # "TransformVars" # Transformation of Variables in following transformation types: # log, sqrt, arcsin # VarList has to be a dataframe or a set of variables # combined with rbind TransformVars <- function (VarList, TransfType) { # VarNames <- colnames (VarList) if (TransfType == "log" | TransfType == "sqrt" | TransfType == "arcsin" | TransfType == "exp") { if (is.matrix (VarList)) { NumVars <- ncol (VarList) NumCases <- nrow (VarList) ScratchTransfVar <- matrix (ncol = NumVars, nrow = NumCases) TransfVar <- matrix (ncol = NumVars, nrow = NumCases) } else { NumVars <- 1 NumCases <- length (VarList) ScratchTransfVar <- vector (mode = "numeric", length = NumCases) TransfVar <- vector (mode = "numeric", length = NumCases) } TransfAddVal <- vector (mode = "numeric", length = NumVars) ScratchTransfVar <- cbind (VarList) for (i in 1:NumVars) { TransfAddVal [i] <- min (subset (ScratchTransfVar [i], ScratchTransfVar [i] > 0)/2) } if (TransfType == "log") { TransfVar <- log (ScratchTransfVar + TransfAddVal) colnames (TransfVar) <- paste ("log", colnames (TransfVar), sep = "") } if (TransfType == "sqrt") { TransfVar <- sqrt (ScratchTransfVar + TransfAddVal) colnames (TransfVar) <- paste ("sqrt", colnames (TransfVar), sep = "") } if (TransfType == "arcsin") { TransfVar <- round ( asin ( sqrt ((ScratchTransfVar)) ) * 180 / pi, 2) colnames (TransfVar) <- paste ("asin", colnames (TransfVar), sep = "") } if (TransfType == "exp") { TransfVar <- exp (ScratchTransfVar) colnames (TransfVar) <- paste ("exp", colnames (TransfVar), sep = "") } TransformVars <- data.frame (TransfVar) attach (TransformVars) return (TransformVars) } else { cat ("Transformation type hast to be a string of: \"log\", \"sqrt\", \"arcsin\", \"exp\"\n") cat ("Example: a <- TransformVars (b, \"log\")\n") return () } } # "tryPCA" # Tries to perform a PCA on basis of several data sets. If the data sets # are not classified but can be splitted in distinct classes by a factor # variable ClassFactor tryPCA will perform also a classified PCA and # Discrinant Analysis. # DataSetsVar is a vector of DataSet names like # x <- c ("name1","name2", ...) # DataSetsUnclassifiedVar is a vector of logical values like # y <- c (TRUE, FALSE, ...) # ClassVar is a list of vectors containing the classes or a NA value if # the DataSetsUnclassifiedVar is FALSE for an given position like # z <- # $ 1 1 2 2 3 1 3 ... # $ NA # $ ... tryPCA <- function (DataSetsVar, DataSetsUnclassifiedVar, ClassVar) { # load library library (ade4) par (mfrow = c(2,2), ask=FALSE, col.lab = "blue") # Start >> Axes for which results should be drawn xAxis <- 1 yAxis <- 2 # Stop >> Axes for which results should be drawn DataSets <- DataSetsVar NumDataSets <- length (DataSets) DataSetsUnclassified <- DataSetsUnclassifiedVar for (i in 1:NumDataSets) { CurrentData <- get (DataSets [i]) SetName <- DataSets [i] # unscaled PCA pca_results.dudi <- dudi.pca (CurrentData, center=FALSE, scale=FALSE, scan=FALSE,nf=4) cat ("Data from unscaled PCA\n ") print (pca_results.dudi) par (mfrow = c(1,1), ask=FALSE, col.lab = "blue") s.label (pca_results.dudi$li, clabel = 0.6, label = case.names (CurrentData), sub = "Factorial map from", csub = 1.0, possub = "bottomleft") CurrentFileName <- paste (SetName,"label",sep = "_") saveGraphs (CurrentFileName) s.arrow (pca_results.dudi$c1, lab = names (CurrentData), clabel = 0.6, sub = "Projected Factorial Map from", csub = 1.0, possub = "bottomleft") CurrentFileName <- paste (SetName,"arrow",sep = "_") saveGraphs (CurrentFileName) s.corcircle (pca_results.dudi$co, lab = names (CurrentData), clabel = 0.6, full=FALSE, box=TRUE, sub = "Scatter Correlation Circle from", csub = 1.0, possub = "bottomleft") CurrentFileName <- paste (SetName,"corcircle",sep = "_") saveGraphs (CurrentFileName) dev.off () par (mfrow = c(1,1), ask = FALSE, col = "blue", cex = 0.8) scatter (pca_results.dudi, sub = paste ("PCA from",SpeciesName), clabel = 0.6, fg = "blue", xax = xAxis, yax = yAxis) CurrentFileName <- paste (SetName, "scatter", paste (xAxis, yAxis, sep = ""), sep = "_") saveGraphs (CurrentFileName) dev.off () # for interpretations TabCols <- ncol (pca_results.dudi$tab) GraphDim <- trunc (sqrt (TabCols)) + 1 par(mfrow = c (GraphDim,GraphDim)) par(mar = c (2.1,2.1,2.1,1.1)) for (j in 1:TabCols) { hist (pca_results.dudi$tab[,j], xlim = c (-50,50), breaks = seq (-100, 100, by = 1), prob = TRUE, right = FALSE, main = names (pca_results.dudi$tab)[j], xlab = "", ylim = c(0,1)) abline(v = 0, lwd = 3) } CurrentFileName <- paste (SetName,"quality",sep = "_") saveGraphs (CurrentFileName) dev.off () if (DataSetsUnclassified [i] == TRUE) { pca_results.dudi.classified <- dudi.pca (CurrentData, center=FALSE, scale=FALSE, scan=FALSE, nf=4) s.class (pca_results.dudi.classified$li, factor (ClassVar [i]), sub = "Scatter for Point Classes from", csub = 1.0, possub = "bottomleft") CurrentFileName <- paste (SetName, "classified", paste (xAxis,yAxis, sep = ""), sep = "_") saveGraphs (CurrentFileName) } pca_results.dudi.sc <- dudi.pca (CurrentData, center=FALSE, scale=FALSE, scan=FALSE, nf=2) pca_results.pcaiv <- pcaiv (pca_results.dudi.sc, CurrentData, scan = FALSE, nf = 2) plot (pca_results.pcaiv, xax = xAxis, yax = yAxis) CurrentFileName <- paste (SetName, "pcaiv", paste (xAxis, yAxis, sep = ""), sep = "_") saveGraphs (CurrentFileName) dev.off () } cat ("\ntryPCA has been finished successful\n") detach("package:ade4") } # -------------------------------------------------------------------- # # End of define basic functions, e.g. for user interactions