Today, a collegue asked me to write a UDF to calculate the Pearson correlation. Of course, this kind of assignments are fun for me. After all, I like statistics AND ColdFusion, so what could be better than combining these? OK, that sounds a bit dorky
, but it’s always nice to write code for doing calculations that you would not usually do by hand. It makes it easier to understand what is going on when you use software like SPSS to do the calculations for you.
So here is my code for calculating the Pearson correlation. It can also be downloaded HERE, and I will submit this to CFLIB.org as well. Comments and criticism very welcome!
UDF
<cfscript>
/**
* Returns the pearson correlation between (the values inside) two keys in an array of structures .
* Values must be numeric, obviously
*
* @param arrayOfStructures - An array of structures containing the specified keys for every element.
* @param xKey - A string: the structKey containing the first variable.
* @param yKey - A string: the structKey containing the second variable.
* @return Struct with two keys:
* pearsonCorrelation: a number ranging from -1 to +1, or an empty string if inputValid is false.
* inputValid - boolean: indicates if the input was valid, so that the pearson correlation could be computed
* (pearson correlation cannot be computed if at least one of the variables has zero variance).
* @author Martijn van der Woud (http://martijnvanderwoud.wordpress.com) at Orga-Toolkit (http://www.orga-toolkit.nl)
* @version 1, July 9, 2008
*/
function pearsonCorrelation (arrayOfStructs, xKey, yKey) {
// numeric: holds the mean value for the xKey
var xMean = 0;
// numercic: holds the mean value for the yKey
var yMean = 0;
// numeric: just a loop index
var i=0;
// numeric: holds the sum of all values for the xKey
var xSum = 0;
// numeric: holds the sum of all values for the yKey
var ySum = 0;
// numeric: the number of elements in arrayOfStructs
var length = arrayLen(arguments.arrayOfStructs);
// numeric: the sum of squared deviations for the xKey
var sqDevX = 0;
// numeric: the sum of squaried deviations for the yKey
var sqDevY = 0;
// numeric: the sum of cross-products
var crossProductSum = 0;
// numeric: holds the deviation from the mean for the xKey in a specific element
var xDeviation = 0;
// numeric: holds the deviation from the mean for the yKey in a specific element
var yDeviation = 0;
// numeric: the Pearson correlation
var pearsonCorrelation = 0;
// struct: the results to return
var results = structNew();
// loop over elements in argument arrayOfStructs
for(i = 1; i lte length; i = i+1) {
// add the xKey and yKey values of the current element to their corresponding sum variable
xSum = xSum + arguments.arrayOfStructs[i][arguments.xKey];
ySum = ySum + arguments.arrayOfStructs[i][arguments.yKey];
} // end of loop over elements in argument arrayOfStructs
// calculate the means of xKey and yKey
xMean = xSum / length;
yMean = ySum / length;
// again, loop over elements in argument arrayOfStructs
for(i = 1; i lte length; i = i+1) {
// calculate deviations from the mean for the current element
xDeviation = arguments.arrayOfStructs[i][arguments.xKey] - xMean;
yDeviation = arguments.arrayOfStructs[i][arguments.yKey] - yMean;
// update sums of squared deviations and cross-products
sqDevX = sqDevX + xDeviation^2;
sqDevY = sqDevY + yDeviation^2;
crossProductSum = crossProductSum + xDeviation * yDeviation;
} // end of loop over elements in argument arrayOfStructs
// if there is no variation in either xKey or yKey, the pearson correlation cannot be computed, so indicate an error
if (min(sqDevX, sqDevY) eq 0) {
results.inputValid = false;
results.pearsonCorrelation = "";
} else { // otherwise, calculatie the pearson correlation
pearsonCorrelation = (crossProductSum / (length-1));
pearsonCorrelation = pearsonCorrelation / sqr(sqDevX / (length-1));
pearsonCorrelation = pearsonCorrelation / sqr(sqDevY / (length-1));
results.inputValid = true;
results.pearsonCorrelation = pearsonCorrelation;
}
return results;
} // end of function pearsonCorrelation()
Example usage
</cfscript>
<!--- example --->
<!--- An array of structs, with keys "X" and "Y"; all values in X and Y are numeric--->
<cfset variables.arrayOfStructs = arrayNew(1)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 3>
<cfset variables.element.Y = 1>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 6>
<cfset variables.element.Y = 2>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 7>
<cfset variables.element.Y = 3>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 4>
<cfset variables.element.Y = 4>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 5>
<cfset variables.element.Y = 5>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 3>
<cfset variables.element.Y = 6>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 7>
<cfset variables.element.Y = 7>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 6>
<cfset variables.element.Y = 8>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 5>
<cfset variables.element.Y = 9>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 2>
<cfset variables.element.Y = 1>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 3>
<cfset variables.element.Y = 2>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 5>
<cfset variables.element.Y = 3>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 6>
<cfset variables.element.Y = 4>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.element = structNew()>
<cfset variables.element.X = 7>
<cfset variables.element.Y = 5>
<cfset arrayAppend(variables.arrayOfStructs, variables.element)>
<cfset variables.pearsonCorrelation = pearsonCorrelation(
arrayOfStructs = variables.arrayOfStructs,
xKey = "X",
yKey = "Y")>
<cfdump var="#variables.pearsonCorrelation#">
hi, im trying to work out the pearsons correlation for my final year project. i dont understand how to write it out in the proper way. i dont know where to find the info from the coreltion apart from
r= number of participants = pearsons correlation;
but my book is showing that thier correlation is significant even though it is higher than the usual sig value of 0.05, mine is 1.86 and i dont know whether to put that as significant or not. and i dont know where to find the r2/ r squared is that the number of participants squared???? im so confused if anyone can help il appreciate it. or it might be to late incase i burst a nerve. i hate spss.
Hi B,
I am not sure what you are trying to ask exactly. Can you describe more clearly what you are trying to do? And PLEASE also describe what you DO understand, so I can figure out where to start from when I try to explain this stuff to you.