llclassify--Computational program function, two versions This file provides the code and documentation for the computational routines (in S) used to obtain the unconditional matrices in App. A-D. Two versions are given differing only in the input required: individual test responses or the discrete pdf of the test responses. The Splus function listed, llclassify, was written as part of this project and it is available either from the Appendix or by a separate link on the main Accuracy Guide page. We should note that the 2003 ETS Technical report (p.128) refers to "the ETSproprietary computer program RELCLASS-COMP (Version 4.12)" for doing these kind of calculations to obtain the unconditional probability matrices of the form shown in App. A-D. The procedures used are based on Livingston and Lewis (1995); another good treatment of the computational issues and the estimation of false positive and false negative probabilities is Hanson and Brennan (1990). --------------------------------------- Copyright (C) 2004, D. Rogosa & M. Finkelman. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ------------------------------------- The following SPlus function is called "llclassify". It is an implementation of Livingston and Lewis' (1995) method for estimating the accuracy of classifications based on test scores. We have made a slight modification whereby in estimating the joint distribution of true scores and observed scores, we integrate over the true score distribution rather than dividing it into bins. This modification leads to simpler code, faster output, and greater precision. The main output is a classification matrix, where a row denotes the true score category and a column represents the observed score category. The function is given below. Comments are denoted by #. The function call is of the form function(observedy, r, max, cut, min = 0, adjust = 1). # The arguments of this function are as follows: # First argument is the inputted data. Data should # be a vector of numeric scores (either scale or # number correct); that is, if 400,000 students are # examined, then "observedy" is a vector of 400,000 # numbers representing the score of each student. If # a pdf file is given instead of a vector, that pdf # file should be expanded into such a vector. Second # argument is inputted reliability coefficient. Third # argument is the maximum possible score on the test. # Fourth argument is vector of cut points. Cuts are # the lower bound of the "higher" category. For # instance, if Far Below Basic stops at a score of # 26, and Below Basic starts at 27, then put in 27 # for that cut. Fifth argument is minimum possible # score on the test (default=0). Sixth argument is # whether to adjust the results to match the # marginals of the observed score distribution; # default (1) is to adjust them. Put in a value other # than 1 if you do not want to adjust the values in # this way. { # Adjust the scores in case min is not 0, and do # appropriate calculations. observed <- observedy - min max <- max - min cut <- cut - min x <- round(observed) p <- (x)/(max) mup <- sum(p)/length(p) deviations <- (p - mup) squaredeviations <- deviations^2 sigma2p <- sum(squaredeviations) /(length(p)) # Find L&L's effective test length, and adjust # observed scores according to effective test # length. effective <- (mup * (1 - mup) - r * sigma2p)/(sigma2p * (1 - r)) n <- round(effective) # Fit 4-parameter beta distribution to true scores, # using method of moments. xprimenotrounded <- (n * (x))/(max) xprime <- round(xprimenotrounded) mprime1x <- sum(xprime)/length(xprime) mprime2x <- sum(xprime^2)/length(xprime) mprime3x <- sum(xprime^3)/length(xprime) mprime4x <- sum(xprime^4)/length(xprime) mprime1p <- mprime1x/n mprime2p <- (mprime2x - mprime1x)/ (n * (n - 1)) mprime3p <- (mprime3x - 3 * mprime2x + 2 * mprime1x)/(n * (n - 1) * (n - 2)) mprime4p <- (mprime4x - 6 * mprime3x + 11 * mprime2x - 6 * mprime1x)/ (n * (n - 1) * (n - 2) * (n - 3)) m2p <- (mprime2p - mprime1p^2) m3p <- (mprime3p - 3 * mprime1p * mprime2p + 2 * mprime1p^3) m4p <- (mprime4p - 4 * mprime1p * mprime3p + 6 * mprime1p^2 * mprime2p - 3 * mprime1p^4) k <- (16 * m2p^3)/m3p^2 l <- (m4p * m2p)/m3p^2 phi <- (3 * (k - 16 * (l - 1)))/ (16 * (l - 1) - 8 - 3 * k) sgnm3p <- m3p/abs(m3p) # Revert to 3-parameter beta distribution with # beta=1 if 4-parameter method of moments does not # give an appropriate solution. if(as.double(k * (phi + 1) + (phi + 2)^2) <= 0) { z <- (2 * (1 - mprime1p) * m2p)/m3p new <- m2p/(1 - mprime1p)^2 betahat <- (z * (1 - new) - 2)/(1 - new + 2 * new * z) alphahat <- (new * betahat * (1 + betahat)) /(1 - new * betahat) ahat <- ((alphahat + betahat) * mprime1p - alphahat)/betahat bhat <- 1 } # Fit the 4-parameter beta distribution if method of # moments gives an appropriate solution. alphahat, # betahat, ahat, and bhat are estimates of the 4 # respective parameters: alpha, beta, a, and b. # alpha and beta are the usual parameters of the # 2-parameter beta distribution. a is the lowest # value that the true score can take, while b is the # highest such value. else { theta <- (sgnm3p * phi * (phi + 2))/ sqrt(k * (phi + 1) + (phi + 2)^2) alphahat <- (phi - theta)/2 betahat <- (phi + theta)/2 ahat <- (mprime1p - sqrt((m2p * alphahat * (alphahat + betahat + 1))/betahat)) bhat <- (mprime1p + sqrt((m2p * betahat * (alphahat + betahat + 1))/alphahat)) } if((ahat < 0) || (alphahat < 0) || (betahat < 0)) { rrr <- m2p/mprime1p^2 w <- (2 * m2p^2)/(mprime1p * m3p) alphahat <- (w * (rrr - 1) - 2)/(1 - rrr - 2 * rrr * w) betahat <- (rrr * alphahat * (alphahat + 1)) /(1 - rrr * alphahat) bhat <- ((alphahat + betahat) * mprime1p) /alphahat ahat <- 0 } # Fit a simple beta model if the method of moments # continues not to give an appropriate solution. if((bhat > 1) || (alphahat < 0) || (betahat < 0)) { z <- (2 * (1 - mprime1p) * m2p)/m3p new <- m2p/(1 - mprime1p)^2 betahat <- (z * (1 - new) - 2)/(1 - new + 2 * new * z) alphahat <- (new * betahat * (1 + betahat)) /(1 - new * betahat) ahat <- ((alphahat + betahat) * mprime1p - alphahat)/betahat bhat <- 1 } # Figure out which values inside [ahat, bhat] belong # in which true score category. cutinp <- (cut - 0.5)/max cutinp2 <- c(ahat, cutinp, bhat) # Figure out which observed scores will map into # which observed classification. 1e-07 term # necessary because of unexpected results from # SPlus' "floor" function. cut2 <- floor(((cut - 0.5) * n)/max - 1e-07) cut3 <- c(0, cut2, n) # Figure out each entry in confusion matrix, which # gives the joint distribution of true score and # observed score. This program integrates over true # score, rather than L&L's suggestion of dividing # into bins. This modification is both more exact # and much faster. Note that the additional # function "ffunc" is required to run this program. # ffunc is added at the end of this text. finalmatrix <- matrix(0, ncol = length(cut) + 1, nrow = length(cut) + 1) for(i in 1:nrow(finalmatrix)) { for(j in 1:nrow(finalmatrix)) { finalmatrix[i, j] <- integrate(ffunc, cutinp2[i], cutinp2[i + 1], ahat1 = ahat, bhat1 = bhat, alphahat1 = alphahat, betahat1 = betahat, lowbinom = cut3[j], highbinom = cut3[j + 1], n1 = n)$integral } } # If adjust is set to 1, adjust the entries to match # the observed score distribution. if(adjust == 1) { proportions <- c(0, length(cut) + 1) xlowest <- x[x < cut[1]] proportions[1] <- length(xlowest)/length(x) if(length(cut) > 1) { for(i in 2:length(cut)) { temp <- x[x < cut[i]] proportions[i] <- (length(temp)/length(x) - sum(proportions[1:(i - 1)])) } } proportions[length(cut) + 1] <- 1 - sum(proportions[1:length(cut)]) for(i in 1:ncol(finalmatrix)) { finalmatrix[, i] <- (finalmatrix[, i] * proportions[i])/sum(finalmatrix[, i]) } } # Return the parameters of the fitted 4-parameter # beta distribution, the effective test length, and # the confusion matrix. list(alphahat = alphahat, betahat = betahat, ahat = ahat, bhat = bhat, n = n, finalmatrix = finalmatrix) } # This short function "ffunc" is needed to perform # the integral used in the above function. All # arguments are defined within the function that # calls it. > ffunc function(x, ahat1, bhat1, alphahat1, betahat1, lowbinom, highbinom, n1) { (dbeta(((x - ahat1))/(bhat1 - ahat1), alphahat1, betahat1) * (pbinom( highbinom, n1, x) - pbinom(lowbinom, n1, x))) /(bhat1 - ahat1) } # The following is an example of the function's # output. It is based on 2003 raw score data of # grade 4 math students. $alphahat is the # estimated value of alpha in the 4-parameter beta # distribution. $betahat is the estimated value of # beta in this 4-parameter beta distribution. # $ahat and $bhat are the estimated lower and upper # bounds of this true score distribution. $n is # the so-called "effective test length" in the # Livingston-Lewis paper. $finalmatrix is the final # confusion matrix outputted by the function. Its # entries have been rounded off for ease of viewing. $alphahat: [1] 1.377472 $betahat: [1] 0.783828 $ahat: [1] 0.2031712 $bhat: [1] 0.9505744 $n: [1] 68 $finalmatrix: [,1] [,2] [,3] [,4] [,5] [1,] 0.04458 0.01162 0.00000 0.00000 0.00000 [2,] 0.01921 0.16683 0.02461 0.00001 0.00000 [3,] 0.00000 0.03097 0.21290 0.03775 0.00004 [4,] 0.00000 0.00000 0.03094 0.19988 0.03115 [5,] 0.00000 0.00000 0.00001 0.03766 0.15181 ============================================================== The following SPlus function is called "llclassify". It is an implementation of Livingston and Lewis' (1995) method for estimating the accuracy of classifications based on test scores. We have made a slight modification whereby in estimating the joint distribution of true scores and observed scores, we integrate over the true score distribution rather than dividing it into bins. This modification leads to simpler code, faster output, and greater precision. The main output is a classification matrix, where a row denotes the true score category and a column represents the observed score category. The function is given below. Comments are denoted by #. The function call is of the form function(obstable, r, max, cut, min = 0, adjust = 1). # The arguments of this function are as follows: # First argument is the inputted data. Data should # be a table of numeric scores (either scale or # number correct). The first column of the table # should be possible scores, and the second column # should be the number of students who obtained each # corresponding score in the first column. Second # argument is inputted reliability coefficient. Third # argument is the maximum possible score on the test. # Fourth argument is vector of cut points. Cuts are # the lower bound of the "higher" category. For # instance, if Far Below Basic stops at a score of # 26, and Below Basic starts at 27, then put in 27 # for that cut. Fifth argument is minimum possible # score on the test (default=0). Sixth argument is # whether to adjust the results to match the # marginals of the observed score distribution; # default (1) is to adjust them. Put in a value other # than 1 if you do not want to adjust the values in # this way. { # Adjust the scores in case min is not 0, and do # appropriate calculations. observedy <- rep(obstable[,1], obstable[,2]) observed <- observedy - min max <- max - min cut <- cut - min x <- round(observed) p <- (x)/(max) mup <- sum(p)/length(p) deviations <- (p - mup) squaredeviations <- deviations^2 sigma2p <- sum(squaredeviations) /(length(p)) # Find L&L's effective test length, and adjust # observed scores according to effective test # length. effective <- (mup * (1 - mup) - r * sigma2p)/(sigma2p * (1 - r)) n <- round(effective) # Fit 4-parameter beta distribution to true scores, # using method of moments. xprimenotrounded <- (n * (x))/(max) xprime <- round(xprimenotrounded) mprime1x <- sum(xprime)/length(xprime) mprime2x <- sum(xprime^2)/length(xprime) mprime3x <- sum(xprime^3)/length(xprime) mprime4x <- sum(xprime^4)/length(xprime) mprime1p <- mprime1x/n mprime2p <- (mprime2x - mprime1x)/ (n * (n - 1)) mprime3p <- (mprime3x - 3 * mprime2x + 2 * mprime1x)/(n * (n - 1) * (n - 2)) mprime4p <- (mprime4x - 6 * mprime3x + 11 * mprime2x - 6 * mprime1x)/ (n * (n - 1) * (n - 2) * (n - 3)) m2p <- (mprime2p - mprime1p^2) m3p <- (mprime3p - 3 * mprime1p * mprime2p + 2 * mprime1p^3) m4p <- (mprime4p - 4 * mprime1p * mprime3p + 6 * mprime1p^2 * mprime2p - 3 * mprime1p^4) k <- (16 * m2p^3)/m3p^2 l <- (m4p * m2p)/m3p^2 phi <- (3 * (k - 16 * (l - 1)))/ (16 * (l - 1) - 8 - 3 * k) sgnm3p <- m3p/abs(m3p) # Revert to 3-parameter beta distribution with # beta=1 if 4-parameter method of moments does not # give an appropriate solution. if(as.double(k * (phi + 1) + (phi + 2)^2) <= 0) { z <- (2 * (1 - mprime1p) * m2p)/m3p new <- m2p/(1 - mprime1p)^2 betahat <- (z * (1 - new) - 2)/(1 - new + 2 * new * z) alphahat <- (new * betahat * (1 + betahat)) /(1 - new * betahat) ahat <- ((alphahat + betahat) * mprime1p - alphahat)/betahat bhat <- 1 } # Fit the 4-parameter beta distribution if method of # moments gives an appropriate solution. alphahat, # betahat, ahat, and bhat are estimates of the 4 # respective parameters: alpha, beta, a, and b. # alpha and beta are the usual parameters of the # 2-parameter beta distribution. a is the lowest # value that the true score can take, while b is the # highest such value. else { theta <- (sgnm3p * phi * (phi + 2))/ sqrt(k * (phi + 1) + (phi + 2)^2) alphahat <- (phi - theta)/2 betahat <- (phi + theta)/2 ahat <- (mprime1p - sqrt((m2p * alphahat * (alphahat + betahat + 1))/betahat)) bhat <- (mprime1p + sqrt((m2p * betahat * (alphahat + betahat + 1))/alphahat)) } if((ahat < 0) || (alphahat < 0) || (betahat < 0)) { rrr <- m2p/mprime1p^2 w <- (2 * m2p^2)/(mprime1p * m3p) alphahat <- (w * (rrr - 1) - 2)/(1 - rrr - 2 * rrr * w) betahat <- (rrr * alphahat * (alphahat + 1)) /(1 - rrr * alphahat) bhat <- ((alphahat + betahat) * mprime1p) /alphahat ahat <- 0 } # Fit a simple beta model if the method of moments # continues not to give an appropriate solution. if((bhat > 1) || (alphahat < 0) || (betahat < 0)) { z <- (2 * (1 - mprime1p) * m2p)/m3p new <- m2p/(1 - mprime1p)^2 betahat <- (z * (1 - new) - 2)/(1 - new + 2 * new * z) alphahat <- (new * betahat * (1 + betahat)) /(1 - new * betahat) ahat <- ((alphahat + betahat) * mprime1p - alphahat)/betahat bhat <- 1 } # Figure out which values inside [ahat, bhat] belong # in which true score category. cutinp <- (cut - 0.5)/max cutinp2 <- c(ahat, cutinp, bhat) # Figure out which observed scores will map into # which observed classification. 1e-07 term # necessary because of unexpected results from # SPlus' "floor" function. cut2 <- floor(((cut - 0.5) * n)/max - 1e-07) cut3 <- c(0, cut2, n) # Figure out each entry in confusion matrix, which # gives the joint distribution of true score and # observed score. This program integrates over true # score, rather than L&L's suggestion of dividing # into bins. This modification is both more exact # and much faster. Note that the additional # function "ffunc" is required to run this program. # ffunc is added at the end of this text. finalmatrix <- matrix(0, ncol = length(cut) + 1, nrow = length(cut) + 1) for(i in 1:nrow(finalmatrix)) { for(j in 1:nrow(finalmatrix)) { finalmatrix[i, j] <- integrate(ffunc, cutinp2[i], cutinp2[i + 1], ahat1 = ahat, bhat1 = bhat, alphahat1 = alphahat, betahat1 = betahat, lowbinom = cut3[j], highbinom = cut3[j + 1], n1 = n)$integral } } # If adjust is set to 1, adjust the entries to match # the observed score distribution. if(adjust == 1) { proportions <- c(0, length(cut) + 1) xlowest <- x[x < cut[1]] proportions[1] <- length(xlowest)/length(x) if(length(cut) > 1) { for(i in 2:length(cut)) { temp <- x[x < cut[i]] proportions[i] <- (length(temp)/length(x) - sum(proportions[1:(i - 1)])) } } proportions[length(cut) + 1] <- 1 - sum(proportions[1:length(cut)]) for(i in 1:ncol(finalmatrix)) { finalmatrix[, i] <- (finalmatrix[, i] * proportions[i])/sum(finalmatrix[, i]) } } # Return the parameters of the fitted 4-parameter # beta distribution, the effective test length, and # the confusion matrix. list(alphahat = alphahat, betahat = betahat, ahat = ahat, bhat = bhat, n = n, finalmatrix = finalmatrix) } # This short function "ffunc" is needed to perform # the integral used in the above function. All # arguments are defined within the function that # calls it. > ffunc function(x, ahat1, bhat1, alphahat1, betahat1, lowbinom, highbinom, n1) { (dbeta(((x - ahat1))/(bhat1 - ahat1), alphahat1, betahat1) * (pbinom( highbinom, n1, x) - pbinom(lowbinom, n1, x))) /(bhat1 - ahat1) } # The following is an example of the function's # output. It is based on 2003 raw score data of # grade 4 math students. $alphahat is the # estimated value of alpha in the 4-parameter beta # distribution. $betahat is the estimated value of # beta in this 4-parameter beta distribution. # $ahat and $bhat are the estimated lower and upper # bounds of this true score distribution. $n is # the so-called "effective test length" in the # Livingston-Lewis paper. $finalmatrix is the final # confusion matrix outputted by the function. Its # entries have been rounded off for ease of viewing. $alphahat: [1] 1.377472 $betahat: [1] 0.783828 $ahat: [1] 0.2031712 $bhat: [1] 0.9505744 $n: [1] 68 $finalmatrix: [,1] [,2] [,3] [,4] [,5] [1,] 0.04458 0.01162 0.00000 0.00000 0.00000 [2,] 0.01921 0.16683 0.02461 0.00001 0.00000 [3,] 0.00000 0.03097 0.21290 0.03775 0.00004 [4,] 0.00000 0.00000 0.03094 0.19988 0.03115 [5,] 0.00000 0.00000 0.00001 0.03766 0.15181