#Partial-correlation Mantel test, using randomization (v1.00 18.11.05) #Author: Dr Mike Weale, Institute for Human Genetics and Health, UCL # #Takes 3 matrices A,B,C and permutes A to perform test for partial correlation of lower diag. matrices of A on B given C #Complete permutation of the A matrix is performed. # (In some cases, restricted permutations are required to perform certain tests # e.g. see Livshits et al. (1991), AJHG 49: 131-146) #Before running program, # (1) Replace the example values below for matrices A, B and C with your values. # (2) Replace current value for "npos" (currently 14) with your value for the number of rows/cols in your A,B,C matrices #To run this program, (1) Install R (http://www.r-project.org/), (2) Copy and paste this entire file into the R window #For general principles of Mantel testing, see Sokal & Rohlf 1995, p.813-819 # #To cite this program, please cite the following paper in which it was introduced: #Nadkarni NA, Weale ME, von Schantz M, Thomas MG #“Evolution of a Length Polymorphism in the Human PER3 Gene, a Component of the Circadian System.” #J Biol Rhythms (2005) Dec;20(6):490-9 # #To report bugs etc: m.weale@ucl.ac.uk #SECTION 1 #This first section sets up essential parameters. Edit these according to your needs. npops <- 14 #Number of populations (i.e. No. rows and cols in your matrices) runs <- 10000 #No. of permutations (randomizations) used to find P value #A contains the "Y variable" or Dependent Variable. The one that is being explained by B and C. Hence the one that is permuted #Note that in the 2-matrix case, it doesn't matter which one you permute, but in the 3-matrix case it DOES matter #Note upper triangle must contain zeros. #Here, A = Fst(hper3) A <- matrix(scan(""), ncol=npops, byrow=T) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.05158 0 0 0 0 0 0 0 0 0 0 0 0 0 -0.00837 0.04203 0 0 0 0 0 0 0 0 0 0 0 0 -0.00507 0.05701 -0.00597 0 0 0 0 0 0 0 0 0 0 0 0.02518 -0.00235 0.0177 0.02963 0 0 0 0 0 0 0 0 0 0 -0.00175 0.08877 -0.00015 -0.00024 0.0551 0 0 0 0 0 0 0 0 0 0.05344 0.20609 0.06281 0.04922 0.15975 0.02407 0 0 0 0 0 0 0 0 0.06549 0.22099 0.0762 0.05975 0.17445 0.03341 -0.00502 0 0 0 0 0 0 0 -0.00692 0.06011 -0.00733 -0.00485 0.0318 -0.00399 0.04523 0.05669 0 0 0 0 0 0 -0.00337 0.07768 -0.0027 -0.00158 0.04593 -0.00461 0.03318 0.04278 -0.00469 0 0 0 0 0 -0.00742 0.03893 -0.00947 -0.00494 0.01555 0.00207 0.06724 0.08056 -0.00606 -0.00091 0 0 0 0 0.13272 0.29876 0.15062 0.1171 0.25138 0.09083 0.01851 0.0098 0.12178 0.09813 0.15382 0 0 0 0.35888 0.16872 0.35112 0.34492 0.21619 0.41461 0.54558 0.56697 0.37293 0.38425 0.34179 0.6443 0 0 0.00124 0.00724 -0.00395 0.00468 -0.00613 0.02322 0.11965 0.13697 0.00571 0.01609 -0.00499 0.22912 0.28529 0 #Here, B contains the design matrix specifying the hypothesis to be tested #Note again the upper triangle must contain zeros. #Here, Fst = insolation matrix - recalculated as squared diff in insolation by MEW B <- matrix(scan(""), ncol=npops, byrow=T) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 760.160041 0 0 0 0 0 0 0 0 0 0 0 0 0 127.260961 265.3641 0 0 0 0 0 0 0 0 0 0 0 0 112.890625 1458.934416 479.872836 0 0 0 0 0 0 0 0 0 0 0 2173.983876 5505.194809 3353.220649 1296.072001 0 0 0 0 0 0 0 0 0 0 46.335249 1181.846884 327.175744 14.577124 1585.552761 0 0 0 0 0 0 0 0 0 8427.607204 14249.91313 10626.10489 6589.705329 2040.870976 7224.150025 0 0 0 0 0 0 0 0 8096.220441 13818.0025 10253.5876 6297.057316 1879.482609 6917.581584 3.323329 0 0 0 0 0 0 0 21766.28116 30661.76103 25222.20423 18744.07428 10182.42446 19804.08853 3106.055824 3312.578025 0 0 0 0 0 0 17885.85264 26020.59348 21030.51036 15156.81077 7588.500544 16111.47876 1758.628096 1914.850081 190.329616 0 0 0 0 0 5243.642569 9996.800256 7004.685636 3817.756944 664.969369 4304.147236 375.933321 308.564356 5643.164641 3760.755625 0 0 0 0 6062.335321 11115.90662 7946.296164 4520.679696 975.625225 5048.670916 194.351481 146.845924 4854.326929 3122.239129 29.680704 0 0 0 581.484996 2671.339225 1252.806025 181.953121 506.790144 299.532249 4581.665344 4338.198225 15232.4964 12017.42138 2332.793401 2888.740009 0 0 36.881329 1131.918736 301.161316 20.720704 1644.545809 0.538756 7349.461441 7040.216836 20011.21452 16298.35223 4400.9956 5153.516944 325.477681 0 #Here, C contains the third matrix specifying another independent variable #Note again the upper triangle must contain zeros. #here, C is Fst(CMP) matrix (CMP = Classical Marker Populations) C <- matrix(scan(""), ncol=npops, byrow=T) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0658 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0118 0.0828 0 0 0 0 0 0 0 0 0 0 0 0 0.0188 0.0697 0.0196 0 0 0 0 0 0 0 0 0 0 0 0.0163 0.0534 0.2321 0.1642 0 0 0 0 0 0 0 0 0 0 0.2241 0.106 0.2077 0.1796 0.0408 0 0 0 0 0 0 0 0 0 0.2361 0.1345 0.2511 0.2252 0.1707 0.1059 0 0 0 0 0 0 0 0 0.2963 0.1664 0.2863 0.1958 0.1703 0.1092 0.0541 0 0 0 0 0 0 0 0.1708 0.0909 0.1723 0.1459 0.0313 0.0179 0.1176 0.1306 0 0 0 0 0 0 0.2288 0.1163 0.1767 0.1487 0.0273 0.0197 0.1244 0.1153 0.0021 0 0 0 0 0 0.1779 0.0709 0.1823 0.1454 0.0263 0.0158 0.1056 0.0983 0.0238 0.0236 0 0 0 0 0.2882 0.1423 0.2519 0.1733 0.136 0.0681 0.0218 0.0705 0.068 0.0896 0.1237 0 0 0 0.3372 0.2733 0.3324 0.2752 0.2324 0.1751 0.1241 0.1503 0.1639 0.1816 0.1918 0.1237 0 0 0.2202 0.1078 0.2073 0.1748 0.0497 0.0154 0.0718 0.0847 0.0293 0.028 0.0229 0.0509 0.146 0 #SECTION 2 #The following function calculates r between 2 matrices, based on the lower left off-diagonal values only. #Matrices MUST have zeros in all non-lower-left cells #A,B = matrices. ncells = number of lower-left off-diag values rmat <- function(A,B,ncells) { (sum(A*B) - sum(A)*sum(B)/ncells) / sqrt( (sum(A*A)-(sum(A)^2)/ncells) * (sum(B*B)-(sum(B)^2)/ncells) ) } #This section performs the Mantel test. #There should be no need to alter the code here #Sokal & Rohlf give this equation for the partial correlation of A on B given C # r_AB.C = (r_AB - r_AC*r_BC) / sqrt((1-r_AC^2)(1-r_BC^2)) nl <- npops*(npops-1)/2 #No. values in the lower diagonal rdist <- rep(0.0,runs) #stores the simulated partial-r statistics obsr <- (rmat(A,B,nl) - rmat(A,C,nl)*rmat(B,C,nl)) / sqrt( (1-rmat(A,C,nl)^2)*(1-rmat(B,C,nl)^2) ) A1 <- A+t(A) #create "full" distance matrix for A, the one to be permuted #This means the lower diag matrix of A1 is always correct, even under permutation screen <- matrix(rep(0,npops*npops),ncol=npops) for (i in 2:npops) {for (j in 1:(i-1)) {screen[i,j]<-1}} #"screen" is used to convert "full" matrix back in lower-left values only rBC <- rmat(B,C,nl) #this doesn't change, so can use same value throughout for (i in 1:runs) { rcols <- order(runif(npops)) #used to shuffle matrix A to become matrix Ar Ar <- A1[rcols,rcols]*screen rAC <- rmat(Ar,C,nl) rdist[i] <- (rmat(Ar,B,nl) - rAC*rBC) / sqrt( (1-rAC^2)*(1-rBC^2) ) } #SECTION 3: results obsr #This prints the observed partial correlation u <- sum(rdist>=obsr)/runs; u #This prints the upper-tail P value (1-tailed test) l <- sum(rdist<=obsr)/runs; l #This prints the lower-tail P value (1-tailed test) min(l,u)*2 #This prints the 2-tailed P value (2-tailed test) hist(rdist,50) #This produces a histogram of r under the null hypothesis #> #SECTION 3: results #> obsr #This prints the observed partial correlation #> u <- sum(rdist>=obsr)/runs; u #This prints the upper-tail P value (1-tailed test) #> l <- sum(rdist<=obsr)/runs; l #This prints the lower-tail P value (1-tailed test) #> min(l,u)*2 #This prints the 2-tailed P value (2-tailed test) - this is usually the one you want #> hist(rdist,50) #This produces a histogram of r under the null hypothesis