Tutorial 13.3 - Transformations and standardizations
12 Mar 2015
> library(vegan) > library(ggplot2) > library(grid) > #define my common ggplot options > murray_opts <- opts(panel.grid.major=theme_blank(), + panel.grid.minor=theme_blank(), + panel.border = theme_blank(), + panel.background = theme_blank(), + axis.title.y=theme_text(size=15, vjust=0,angle=90), + axis.text.y=theme_text(size=12), + axis.title.x=theme_text(size=15, vjust=-1), + axis.text.x=theme_text(size=12), + axis.line = theme_segment(), + plot.margin=unit(c(0.5,0.5,1,2),"lines") + )
Error: Use 'theme' instead. (Defunct; last used in version 0.9.1)
> coenocline <- function(x,A0,m,r,a,g, int=T, noise=T) { + #x is the environmental range + #A0 is the maximum abundance of the species at the optimum environmental conditions + #m is the value of the environmental gradient that represents the optimum conditions for the species + #r the species range over the environmental gradient (niche width) + #a and g are shape parameters representing the skewness and kurtosis + # when a=g, the distribution is symmetrical + # when a>g - negative skew (large left tail) + # when a<g - positive skew (large right tail) + #int - indicates whether the responses should be rounded to integers (=T) + #noise - indicates whether or not random noise should be added (reflecting random sampling) + #NOTE. negative numbers converted to 0 + b <- a/(a+g) + d <- (b^a)*(1-b)^g + cc <- (A0/d)*((((x-m)/r)+b)^a)*((1-(((x-m)/r)+b))^g) + if (noise) {n <- A0/10; n[n<0]<-0; cc<-cc+rnorm(length(cc),0,n)} + cc[cc<0] <- 0 + cc[is.na(cc)]<-0 + if (int) cc<-round(cc,0) + cc + } > #plot(coenocline(0:100,40,40,20,1,1, int=T, noise=T), ylim=c(0,100))
Tutorial 13.1 discussed the idea of amalgamating variables together so as to create new, condensed insights into the composition of communities (objects). However, the success and appropriateness of the amalgamations is very much dependent on the characteristics of the original variables (e.g. species).
Some of the techniques are parametric and assume that the variables are:
- normally distributed
- linearly related
- measured on the same scale
Furthermore, the amalgams of many of the techniques can be overly biased towards the patterns of highly abundant species or variables whose scales yield values of large magnitude (such as units of force or pressure). Rare or variables with values small in magnitude tend to have almost no influence at all.
Ecological/biological multivariate data typically comprises of the following:- species abundances (counts). Whilst species abundances are usually expressed in the same units and on the same scale, they tend to be positively skewed (since they are truncated at zero).
- environmental data (measurements). Environmental data tend to be measured on disparate scales (pH, degrees C, mm, kg, etc) and thus can present issues of non-normality and non-linearity
- morphometric data (counts, measurements). Morphometric data as used in taxonomic studies often represent a mixture of measurements (lengths, masses) in addition to binary (feature present/absent) and counts and thus normality and linearity can also pose issues.
Hence, the first step of most multivariate analyses is to transform the data so that the data used to create the amalgamations (and thus the amalgamations themselves) best represents the broad characteristics of the communities.
Again we will use a simulated data set introduced in the previous Tutorial. This multivariate dataset
comprises the abundances of 10 species from each of 10 sites located throughout a landscape.
> set.seed(1) > x <- seq(0,50,l=10) > n <- 10 > sp1<-coenocline(x=x,A0=5,m=0,r=2,a=1,g=1,int=T, noise=T) > sp2<-coenocline(x=x,A0=70,m=7,r=30,a=1,g=1,int=T, noise=T) > sp3<-coenocline(x=x,A0=50,m=15,r=30,a=1,g=1,int=T, noise=T) > sp4<-coenocline(x=x,A0=7,m=25,r=20,a=0.4,g=0.1,int=T, noise=T) > sp5<-coenocline(x=x,A0=40,m=30,r=30,a=0.6,g=0.5,int=T, noise=T) > sp6<-coenocline(x=x,A0=15,m=35,r=15,a=0.2,g=0.3,int=T, noise=T) > sp7<-coenocline(x=x,A0=20,m=45,r=25,a=0.5,g=0.9,int=T, noise=T) > sp8<-coenocline(x=x,A0=5,m=45,r=5,a=1,g=1,int=T, noise=T) > sp9<-coenocline(x=x,A0=20,m=45,r=15,a=1,g=1,int=T, noise=T) > sp10<-coenocline(x=x,A0=30,m=50,r=5,a=1,g=1,int=T, noise=T) > X <- cbind(sp1, sp10,sp9,sp2,sp3,sp8,sp4,sp5,sp7,sp6) > #X<-X[c(1,10,9,2,3,8,4,5,7,6),] > colnames(X) <- paste("Sp",1:10,sep="") > rownames(X) <- paste("Site", c(1,10,9,2,3,8,4,5,7,6), sep="") > X <- X[c(1,4,5,7,8,10,9,6,3,2),] > data <- data.frame(Sites=factor(rownames(X),levels=rownames(X)), X)
Sites | Sp1 | Sp2 | Sp3 | Sp4 | Sp5 | Sp6 | Sp7 | Sp8 | Sp9 | Sp10 |
---|---|---|---|---|---|---|---|---|---|---|
Site1 | 5 | 0 | 0 | 65 | 5 | 0 | 0 | 0 | 0 | 0 |
Site2 | 0 | 0 | 0 | 25 | 39 | 0 | 6 | 23 | 0 | 0 |
Site3 | 0 | 0 | 0 | 6 | 42 | 0 | 6 | 31 | 0 | 0 |
Site4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 40 | 0 | 14 |
Site5 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 34 | 18 | 12 |
Site6 | 0 | 29 | 12 | 0 | 0 | 0 | 0 | 0 | 22 | 0 |
Site7 | 0 | 0 | 21 | 0 | 0 | 5 | 0 | 0 | 20 | 0 |
Site8 | 0 | 0 | 0 | 0 | 13 | 0 | 6 | 37 | 0 | 0 |
Site9 | 0 | 0 | 0 | 60 | 47 | 0 | 4 | 0 | 0 | 0 |
Site10 | 0 | 0 | 0 | 72 | 34 | 0 | 0 | 0 | 0 | 0 |
I will also introduce another simulated data set that comprises five biophysical measurements made from the 10 sites. These biophysical environmental data include pH (log scale), Pressure (Pa), Altitude (m), Slope (degrees) and Substrate (categorical: Quartz or Shale).
> set.seed(1) > Site <- gl(10,1,10,lab=paste('Site',1:10, sep="")) > Y <- matrix(c( + 6.1,4.2,101325,2, + 6.7,9.2,101352,510, + 6.8,8.6,101356,546, + 7.0,7.4,101372,758, + 7.2,5.8,101384,813, + 7.5,8.4,101395,856, + 7.5,0.5,101396,854, + 7.0,11.8,101370,734, + 8.4,8.2,101347,360, + 6.2,1.5,101345,356 + ),10,4, byrow=TRUE) > colnames(Y) <- c('pH','Slope', 'Pressure', 'Altitude') > Substrate <- factor(c('Quartz','Shale','Shale','Shale','Shale','Quartz','Quartz','Shale','Quartz','Quartz')) > enviro <- data.frame(Site,Y,Substrate)
Site | pH | Slope | Pressure | Altitude | Substrate |
---|---|---|---|---|---|
Site1 | 6.1 | 4.2 | 101325 | 2 | Quartz |
Site2 | 6.7 | 9.2 | 101352 | 510 | Shale |
Site3 | 6.8 | 8.6 | 101356 | 546 | Shale |
Site4 | 7.0 | 7.4 | 101372 | 758 | Shale |
Site5 | 7.2 | 5.8 | 101384 | 813 | Shale |
Site6 | 7.5 | 8.4 | 101395 | 856 | Quartz |
Site7 | 7.5 | 0.5 | 101396 | 854 | Quartz |
Site8 | 7.0 | 11.8 | 101370 | 734 | Shale |
Site9 | 8.4 | 8.2 | 101347 | 360 | Quartz |
Site10 | 6.2 | 1.5 | 101345 | 356 | Quartz |
Simple transformations
Simple transformations such as those outlined in Tutorial 4.1 can be used to help address issues of normality and non-linearity. Indeed species abundance data are routinely forth-root ($\sqrt[4]{}$) or logx+1 transformed prior to multivariate analysis.
In addition, a rather drastic transformation is that which transforms count data in to binary (presence/absence) data. When performed on the entire data frame, such a transformation removes the distinction between dominant and rare species. It can be useful when the applied to a single variable whose values are predominantly 0's and 1's.
Many of the common transformation/standardization routines used in ecology are supported in R by the decostand function within the vegan package.
> library(vegan) > decostand(data[,-1], method="pa")
Sp1 Sp2 Sp3 Sp4 Sp5 Sp6 Sp7 Sp8 Site1 1 0 0 1 1 0 0 0 Site2 0 0 0 1 1 0 1 1 Site3 0 0 0 1 1 0 1 1 Site4 0 0 0 0 0 0 0 1 Site5 0 0 1 0 0 0 0 1 Site6 0 1 1 0 0 0 0 0 Site7 0 0 1 0 0 1 0 0 Site8 0 0 0 0 1 0 1 1 Site9 0 0 0 1 1 0 1 0 Site10 0 0 0 1 1 0 0 0 Sp9 Sp10 Site1 0 0 Site2 0 0 Site3 0 0 Site4 0 1 Site5 1 1 Site6 1 0 Site7 1 0 Site8 0 0 Site9 0 0 Site10 0 0
Standardizations
Close inspection of the above species abundance data reveals that whilst some of the species are relatively abundant (Species 1, 3, 4 and 8), other species (such as Species 2, 6 and especially 9) are relatively rare. As previously stated, most numerical methods of amalgamating will be more heavily influenced by the more abundant species. Sometimes this is a desired outcome - you might want your measures of the community to reflect this dominance. That is, you may want a numerical description of what is visible obvious in the field. Yet for other purposes, you may want the patterns to be more representative of the subtleties and complexities of the communities.
The environmental data highlight a different set of common issues:
- some variables (particularly Pressure) have values of much greater magnitude than others
- despite having values of very high magnitude, the Pressure measurements have relatively little spread (variation).
- the altitude measurements have relatively high levels of spread
Rather than simply transforming the variables in isolation, standardizations transform the values relative to other variables, objects or both. There are numerous ways that multivariate data can be standardized in an attempt to alter the balance of weightings and inter-relationships. Most work by adjusting the values such that some property such as means, maximums, totals and or spread are equivalent for each species.
- Total per row. Each value in a row is divided by the total for the row.
This is a simple standardization that can dampen objects (sites) that have very high abundances.
> library(vegan) > decostand(data[,-1], method="total")
Sp1 Sp2 Sp3 Sp4 Site1 0.06667 0.0000 0.00000 0.86667 Site2 0.00000 0.0000 0.00000 0.26882 Site3 0.00000 0.0000 0.00000 0.07059 Site4 0.00000 0.0000 0.00000 0.00000 Site5 0.00000 0.0000 0.08571 0.00000 Site6 0.00000 0.4603 0.19048 0.00000 Site7 0.00000 0.0000 0.45652 0.00000 Site8 0.00000 0.0000 0.00000 0.00000 Site9 0.00000 0.0000 0.00000 0.54054 Site10 0.00000 0.0000 0.00000 0.67925 Sp5 Sp6 Sp7 Sp8 Site1 0.06667 0.0000 0.00000 0.0000 Site2 0.41935 0.0000 0.06452 0.2473 Site3 0.49412 0.0000 0.07059 0.3647 Site4 0.00000 0.0000 0.00000 0.7407 Site5 0.00000 0.0000 0.00000 0.4857 Site6 0.00000 0.0000 0.00000 0.0000 Site7 0.00000 0.1087 0.00000 0.0000 Site8 0.23214 0.0000 0.10714 0.6607 Site9 0.42342 0.0000 0.03604 0.0000 Site10 0.32075 0.0000 0.00000 0.0000 Sp9 Sp10 Site1 0.0000 0.0000 Site2 0.0000 0.0000 Site3 0.0000 0.0000 Site4 0.0000 0.2593 Site5 0.2571 0.1714 Site6 0.3492 0.0000 Site7 0.4348 0.0000 Site8 0.0000 0.0000 Site9 0.0000 0.0000 Site10 0.0000 0.0000
- Maximum per column. Each value in a column is divided by the maximum of the column. This is a simple
standardization that can even up the influence of each of the columns.
This is effective for variables that are measured on the same scale and that have
similar spread (variance). Standardized values will range from 0 to 1.
Note this standardization will not address changes in the spread or variability of variables measured on different scales.
> library(vegan) > decostand(data[,-1], method="max")
Sp1 Sp2 Sp3 Sp4 Sp5 Site1 1 0 0.0000 0.90278 0.1064 Site2 0 0 0.0000 0.34722 0.8298 Site3 0 0 0.0000 0.08333 0.8936 Site4 0 0 0.0000 0.00000 0.0000 Site5 0 0 0.2857 0.00000 0.0000 Site6 0 1 0.5714 0.00000 0.0000 Site7 0 0 1.0000 0.00000 0.0000 Site8 0 0 0.0000 0.00000 0.2766 Site9 0 0 0.0000 0.83333 1.0000 Site10 0 0 0.0000 1.00000 0.7234 Sp6 Sp7 Sp8 Sp9 Sp10 Site1 0 0.0000 0.000 0.0000 0.0000 Site2 0 1.0000 0.575 0.0000 0.0000 Site3 0 1.0000 0.775 0.0000 0.0000 Site4 0 0.0000 1.000 0.0000 1.0000 Site5 0 0.0000 0.850 0.8182 0.8571 Site6 0 0.0000 0.000 1.0000 0.0000 Site7 1 0.0000 0.000 0.9091 0.0000 Site8 0 1.0000 0.925 0.0000 0.0000 Site9 0 0.6667 0.000 0.0000 0.0000 Site10 0 0.0000 0.000 0.0000 0.0000
- Wisconsin double standardization. Each value is first standardized by the column maximum before
being standardized by the row total. This standardization tends to enhance the patterns in the data and
therefore is a popular choice.
> library(vegan) > wisconsin(data[,-1])
Sp1 Sp2 Sp3 Sp4 Site1 0.4977 0.0000 0.0000 0.44933 Site2 0.0000 0.0000 0.0000 0.12617 Site3 0.0000 0.0000 0.0000 0.03028 Site4 0.0000 0.0000 0.0000 0.00000 Site5 0.0000 0.0000 0.1016 0.00000 Site6 0.0000 0.3889 0.2222 0.00000 Site7 0.0000 0.0000 0.3438 0.00000 Site8 0.0000 0.0000 0.0000 0.00000 Site9 0.0000 0.0000 0.0000 0.33333 Site10 0.0000 0.0000 0.0000 0.58025 Sp5 Sp6 Sp7 Sp8 Site1 0.05295 0.0000 0.0000 0.0000 Site2 0.30152 0.0000 0.3634 0.2089 Site3 0.32472 0.0000 0.3634 0.2816 Site4 0.00000 0.0000 0.0000 0.5000 Site5 0.00000 0.0000 0.0000 0.3024 Site6 0.00000 0.0000 0.0000 0.0000 Site7 0.00000 0.3438 0.0000 0.0000 Site8 0.12563 0.0000 0.4542 0.4201 Site9 0.40000 0.0000 0.2667 0.0000 Site10 0.41975 0.0000 0.0000 0.0000 Sp9 Sp10 Site1 0.0000 0.0000 Site2 0.0000 0.0000 Site3 0.0000 0.0000 Site4 0.0000 0.5000 Site5 0.2911 0.3049 Site6 0.3889 0.0000 Site7 0.3125 0.0000 Site8 0.0000 0.0000 Site9 0.0000 0.0000 Site10 0.0000 0.0000
- Range. Each value in a column is standardized into a range of 0 to 1.
This is one way to adjust for differences in the spread of values. For example, it could be used
to even up the spread of measurements in the simulated environmental data.
> library(vegan) > #we need to first convert the categorical variable (Substrate) into a numeric > enviro1 <- within(enviro, Substrate <- as.numeric(Substrate)) > decostand(enviro1[,-1], method="range")
pH Slope Pressure Altitude 1 0.00000 0.3274 0.0000 0.0000 2 0.26087 0.7699 0.3803 0.5948 3 0.30435 0.7168 0.4366 0.6370 4 0.39130 0.6106 0.6620 0.8852 5 0.47826 0.4690 0.8310 0.9496 6 0.60870 0.6991 0.9859 1.0000 7 0.60870 0.0000 1.0000 0.9977 8 0.39130 1.0000 0.6338 0.8571 9 1.00000 0.6814 0.3099 0.4192 10 0.04348 0.0885 0.2817 0.4145 Substrate 1 0 2 1 3 1 4 1 5 1 6 0 7 0 8 1 9 0 10 0
- Centre. Each value in a column is standardized to have a mean of 0 by subtracting the
column mean from each of the values in the column. This is useful when the variables all have
similar absolute spreads of values yet vastly different magnitudes.
values when
> #we need to first convert the categorical variable (Substrate) into a numeric > enviro1 <- within(enviro, Substrate <- as.numeric(Substrate)) > apply(enviro1[,-1],2,scale, scale=FALSE)
pH Slope Pressure Altitude [1,] -0.94 -2.36 -39.2 -576.9 [2,] -0.34 2.64 -12.2 -68.9 [3,] -0.24 2.04 -8.2 -32.9 [4,] -0.04 0.84 7.8 179.1 [5,] 0.16 -0.76 19.8 234.1 [6,] 0.46 1.84 30.8 277.1 [7,] 0.46 -6.06 31.8 275.1 [8,] -0.04 5.24 5.8 155.1 [9,] 1.36 1.64 -17.2 -218.9 [10,] -0.84 -5.06 -19.2 -222.9 Substrate [1,] -0.5 [2,] 0.5 [3,] 0.5 [4,] 0.5 [5,] 0.5 [6,] -0.5 [7,] -0.5 [8,] 0.5 [9,] -0.5 [10,] -0.5
- Standardize. Each value in a column is standardized to a mean of 0 and standard deviation of 1.
That is, each of the variables are normalized.
This is another way to adjust for differences in the spread of values.
> #we need to first convert the categorical variable (Substrate) into a numeric > enviro1 <- within(enviro, Substrate <- as.numeric(Substrate)) > apply(enviro1[,-1],2,scale)
> #OR > library(vegan) > decostand(enviro1[,-1], method="standardize")
pH Slope Pressure Altitude 1 -1.39885 -0.6636 -1.6863 -2.0691 2 -0.50597 0.7423 -0.5248 -0.2471 3 -0.35715 0.5736 -0.3527 -0.1180 4 -0.05953 0.2362 0.3355 0.6424 5 0.23810 -0.2137 0.8517 0.8396 6 0.68455 0.5173 1.3249 0.9938 7 0.68455 -1.7039 1.3679 0.9867 8 -0.05953 1.4733 0.2495 0.5563 9 2.02387 0.4611 -0.7399 -0.7851 10 -1.25004 -1.4227 -0.8259 -0.7995 Substrate 1 -0.9487 2 0.9487 3 0.9487 4 0.9487 5 0.9487 6 -0.9487 7 -0.9487 8 0.9487 9 -0.9487 10 -0.9487
As with simple transformations in statistical models, it is usually advised that multivariate analyses be repeated with a range of transformation and standardization options so as to gain an appreciation of the influence of dominant/rare species, populous and sparse sites, large or varied measurements. If the various standardizations ultimately yield similar patterns amongst communities, then it suggests that the patterns are stable within the scale of your observations and that any one of the outcomes can be used to describe the patterns. If the patterns are substantially different, then it is likely that the different standardizations are drawing out different scales of community patterns.
Worked Examples
Basic statistics references
- Legendre and Legendre
- Quinn & Keough (2002) - Chpt 17
Standardizations
The following community data represent the abundances of three species of gastropods in five quadrats (ranging from high shore marsh - Quadrat 1, to low shore marsk - Quadrat 5) in a saltmarsh.
Download gastropod data setFormat of the gastropod | |||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
> gastropod <- read.csv('../downloads/data/gastropod.csv') > gastropod
Salinator Ophicardelus Marinula 1 4 0 1 2 9 3 0 3 9 4 1 4 6 2 0 5 0 1 1
- Before proceeding with any multivariate analyses, it is a good idea to get a
'feel' for your data. The gastropod data set is intentionally very small so that we can help
relate various calculated properties to what we can see by simply inspecting the counts.
To build up a picture of these data, generate the following exploratory properties:
- Scale of each of the species (column maximums)
Show code
> apply(gastropod,2,max)
Salinator Ophicardelus Marinula 9 4 1
- Scale of each of the species (column means)
Show code
> apply(gastropod,2,mean)
Salinator Ophicardelus Marinula 5.6 2.0 0.6
- Variability of each of the species (column variance)
Show code
> apply(gastropod,2,var)
Salinator Ophicardelus Marinula 14.3 2.5 0.3
- Abundances in each quadrat (row totals)
Show code
> apply(gastropod,1,sum)
[1] 5 12 14 8 2
- Correlations between species
Show code
> cor(gastropod)
Salinator Ophicardelus Salinator 1.0000 0.7944 Ophicardelus 0.7944 1.0000 Marinula -0.4587 -0.2887 Marinula Salinator -0.4587 Ophicardelus -0.2887 Marinula 1.0000
- Scale of each of the species (column maximums)
- We intend to use these data in some sort of multivariate analysis. Typically, before doing so,
we standardize the data in order to ensure that certain features are honored in the analysis. Standardize the
gastropod data to achieve the following:
- ensure that the rare and abundant species alike have similar weighting and are constrained to the range of 0-1
Show code
> library(vegan) > gast1 <- decostand(gastropod,"max") > gast1
Salinator Ophicardelus Marinula 1 0.4444 0.00 1 2 1.0000 0.75 0 3 1.0000 1.00 1 4 0.6667 0.50 0 5 0.0000 0.25 1
> apply(gast1,2,max)
Salinator Ophicardelus Marinula 1 1 1
> apply(gast1,2,range)
Salinator Ophicardelus Marinula [1,] 0 0 0 [2,] 1 1 1
- ensure that the all species have similar weighting yet maintain their variability. This could be important if
you want multivariate patterns to reflect heterogeneity (many analyses are drawn towards higher variability).
Show code
> #center the data > gast2<-apply(gastropod,2,scale,scale=FALSE) > gast2
Salinator Ophicardelus Marinula [1,] -1.6 -2 0.4 [2,] 3.4 1 -0.6 [3,] 3.4 2 0.4 [4,] 0.4 0 -0.6 [5,] -5.6 -1 0.4
> apply(gast2,2,mean)
Salinator Ophicardelus Marinula 3.554e-16 0.000e+00 2.220e-17
> apply(gast2,2,var)
Salinator Ophicardelus Marinula 14.3 2.5 0.3
- ensure that the all species have similar weighting. The influences of highly abundant and/or variable
species are suppressed and those of rare species are enhanced so that all have similar influence.
Show code
> #scale data to mean=0 and variance of 1 > gast3<-apply(gastropod,2,scale) > #OR > library(vegan) > gast3<-decostand(gastropod,method="standardize") > gast3
Salinator Ophicardelus Marinula 1 -0.4231 -1.2649 0.7303 2 0.8991 0.6325 -1.0954 3 0.8991 1.2649 0.7303 4 0.1058 0.0000 -1.0954 5 -1.4809 -0.6325 0.7303
> apply(gast3,2,mean)
Salinator Ophicardelus Marinula 1.193e-16 0.000e+00 0.000e+00
> apply(gast3,2,var)
Salinator Ophicardelus Marinula 1 1 1
- ensure that all sites have similar weightings and are constrained to a range of 0-1.
Show code
> library(vegan) > gast4 <- decostand(gastropod,"total") > gast4
Salinator Ophicardelus Marinula 1 0.8000 0.0000 0.20000 2 0.7500 0.2500 0.00000 3 0.6429 0.2857 0.07143 4 0.7500 0.2500 0.00000 5 0.0000 0.5000 0.50000
> apply(gast4,1,sum)
[1] 1 1 1 1 1
> cor(gast4)
Salinator Ophicardelus Salinator 1.0000 -0.8353 Ophicardelus -0.8353 1.0000 Marinula -0.8852 0.4836 Marinula Salinator -0.8852 Ophicardelus 0.4836 Marinula 1.0000
- ensure that all species and sites have similar weightings and yet enhances any underlying patterns (increases species correlations for example).
This can improve the success of any resulting multivariate analyses.
Show code
> library(vegan) > # Wisconsin double standardization > gast5 <- wisconsin(gastropod) > gast5
Salinator Ophicardelus Marinula 1 0.3077 0.0000 0.6923 2 0.5714 0.4286 0.0000 3 0.3333 0.3333 0.3333 4 0.5714 0.4286 0.0000 5 0.0000 0.2000 0.8000
> cor(gast5)
Salinator Ophicardelus Salinator 1.0000 0.6123 Ophicardelus 0.6123 1.0000 Marinula -0.9241 -0.8680 Marinula Salinator -0.9241 Ophicardelus -0.8680 Marinula 1.0000
- ensure that the rare and abundant species alike have similar weighting and are constrained to the range of 0-1