## Calculate expected percent correctly predicted, expected percent in modal
## category, and expected percent reduction in error for probit, logit, and
## related models.  Also optionally presents a classification table
## (cfr Stata's lstat).
##
## Implements algorithm suggested by Herron (2000) with extensions suggested
## by Harvey Palmer for polychotomous probit and logit and for an estimated
## "expected proportional reduction in error" score.
##
## Note that the "ePMC" presented here is mathematically the same as the
## "proportional chance criterion" presented in other works, apparently
## derived in Morrison (1969) and presented in Hair et al. (1998).
##
## Ported from a Stata module to R/S on 20030415

## Definitions of specificity and sensitivity borrowed from the Stata
## reference manual.

## Copyright (C) 2003, 2010, 2012 Christopher N. Lawrence
## Licensed under the terms of the GNU General Public License, version 2 or later

epcp <- function(fit, ...) {
    UseMethod("epcp")
}

## Handles binomial responses (probit, logit, cloglog)
## Doesn't handle the two-variable response case yet (should it?)
epcp.glm <- function (fit, data=NULL, ctable=TRUE, cutoff=0.5) {
    if (family(fit)$family != "binomial")
      stop("Not a binomial response model")

    pr <- predict(fit, type="response")
    count <- length(pr)
    ## Classify the response into 0/1
    pclass <- as.integer(pr >= cutoff)
    ## Get the response (dependent variable) from the fit
    if(!is.null(data)) {
      fr <- model.frame(fit, data=data)
    } else {
      fr <- model.frame(fit)
    }
    response <- names(fr)[1]
    rclass <- fr[,1]

    rclass <- rclass[!is.na(pclass)]
    pclass <- pclass[!is.na(pclass)]
    pr <- pr[!is.na(pclass)]

    ## same: TRUE if response is the same as pred.
    same <- rclass == pclass            # No. of correct predictions
    correct <- length(same[same])

    vec0 <- !(fit$y)
    vec1 <- !vec0                  # This should be the same as !vec0

    ## Count 1s and 0s in the response (d.v.)
    cat0 <- length(rclass[vec0])
    cat1 <- length(rclass[vec1])
    modal <- max(cat0, cat1)

    result <- list()
    result$sensitivity <- (length(rclass[vec1 & pclass]) / cat1)*100
    result$specificity <- (length(rclass[vec0 & !pclass]) / cat0)*100

    lrt <- fit$null.deviance-fit$deviance
    satlrt <- fit$null.deviance
    result$rsq.an <- lrt/(count+lrt)
    result$rsq.maddala <- 1-exp(-lrt/count)
    result$rsq.naglekerke <- result$rsq.maddala/(1-exp(-satlrt/count))
    result$rsq.mcfadden <- lrt/satlrt
    result$lambda.cramer <- mean(pr[vec1])-mean(pr[vec0])
    result$rsq.vz <- result$rsq.an/(satlrt/(count+satlrt))
    ##result$rsq.bl <- (rclass*pr+(1-rclass)*(1-pr))/count
    rmean <- mean(fit$y)
    result$rsq.efron <- 1-(sum((fit$y-pr)**2)/sum((fit$y-rmean)**2))

    bx <- predict(fit)
    meanbx <- mean(bx)
    ssr <- sum((bx-meanbx)**2)
    result$rsq.mz <- ssr/(count+ssr)

    result$pcp <- correct/count*100
    result$pmc <- modal/count*100
    result$pre <- 100*(result$pcp-result$pmc)/(100-result$pmc)

    result$epmc <- 100*((cat0*cat0)+(cat1*cat1))/(count*count)
    ## Same as Ben-Akiva and Lerman (1985), Kay and Little (1986)
    result$epcp <- 100*(sum(fit$y*pr) + sum((1-fit$y)*(1-pr)))/count
    result$epre <- 100*(result$epcp-result$epmc)/(100-result$epmc)

    if(ctable) {
      result$ctable <- table(rclass,pclass,dnn=c(response,
                                             paste('predicted values of',
                                                   response)))
    }
#     cat("Sensitivity:", formatC(sensitivity), "%\n")
#     cat("Specificity:", formatC(specificity), "%\n")
#     cat("\n")
#     cat("Percent correctly predicted (PCP):    ", formatC(pcp), "%\n")
#     cat("Percent in modal category (PMC):      ", formatC(pmc), "%\n")
#     cat("Proportional reduction in error (PRE):", formatC(pre), "%\n")
#     cat("\n")
#     cat("ePCP:", formatC(epcp), "%\n")
#     cat("ePMC:", formatC(epmc), "%\n")
#     cat("ePRE:", formatC(epre), "%\n")
    class(result) <- "epcp"
    return (result)
}

print.epcp <- function(ob) {
  if (!inherits(ob, "epcp")) stop("Not an epcp object.")

  return (print.default(ob))
}


## Ordinal response models
epcp.polr <- function (fit, data=NULL, ctable=TRUE, cutoff=0.5) {
    ## Ignore any cutoff argument.
    ##if(!inherits(fit, "polr")) stop("Not a ordinal fit")

    pr <- predict(fit, type="probs")
    count <- dim(pr)[1]
    nlev <- dim(pr)[2]
    rows <- as.integer(rownames(pr))
    ## Classify the response
    pclass <- predict(fit, type="class")
    ## Get the response (dependent variable) from the fit
    response <- attr(terms(fit), "variables")[[2]]
    if (is.null(data))
        data <- eval(fit$call$data)
    if (is.null(data))
        rclass <- eval(response)[rows]
    else
        rclass <- eval(response, data, parent.frame())[rows]

    ##pclass <- as.ordered(pclass)
    ##levels(pclass) <- levels(rclass)

    ## same: TRUE if the response is the same as the prediction
    same <- rclass == pclass
    ## No. of correct predictions
    correct <- length(same[same])

    lcounts <- numeric(nlev)
    levelnames <- levels(rclass)
    for(level in 1:nlev) {
        lcounts[level] <- length(rclass[rclass==levelnames[level]])
    }
    modal <- max(lcounts)

    result <- list()
    
    result$pcp <- correct/count*100
    result$pmc <- modal/count*100
    result$pre <- 100*(result$pcp-result$pmc)/(100-result$pmc)

    ## Takes advantage of the row operations in R/S
    lcorrect <- numeric(nlev)
    for(level in 1:nlev) {
        lcorrect[level] <- sum(pr[,level]*ifelse(pclass==levelnames[level],
                                                 1, 0))
    }
    result$epcp <- 100*sum(lcorrect)/count
    result$epmc <- 100*(sum(lcounts*lcounts))/(count*count)
    result$epre <- 100*(result$epcp-result$epmc)/(100-result$epmc)

    if(ctable) {
        result$ctable <- table(rclass,pclass)
        ## ,dnn=c(response,
        ##                                      paste('predicted values of',
        ##                                            response)))
    }

    class(result) <- "epcp"
    return (result)
}

epcp.clm2 <- function (fit, data=NULL, ctable=TRUE, cutoff=0.5) {
    ## Ignore any cutoff argument.
    if(!inherits(fit, "clm2")) stop("Not a clm2 fit")

    ret <- predict4clm2(fit, data)
    pclass <- ret$class
    pr <- ret$probs

    rclass <- fit$y
    
    count <- dim(pr)[1]
    nlev <- dim(pr)[2]
    rows <- as.integer(rownames(pr))

    ## same: TRUE if the response is the same as the prediction
    same <- rclass == pclass
    ## No. of correct predictions
    correct <- length(same[same])

    lcounts <- numeric(nlev)
    levelnames <- levels(rclass)
    for(level in 1:nlev) {
        lcounts[level] <- length(rclass[rclass==levelnames[level]])
    }
    modal <- max(lcounts)

    result <- list()
    
    result$pcp <- correct/count*100
    result$pmc <- modal/count*100
    result$pre <- 100*(result$pcp-result$pmc)/(100-result$pmc)

    ## Takes advantage of the row operations in R/S
    lcorrect <- numeric(nlev)
    for(level in 1:nlev) {
        lcorrect[level] <- sum(pr[,level]*ifelse(pclass==levelnames[level],
                                                 1, 0))
    }
    result$epcp <- 100*sum(lcorrect)/count
    result$epmc <- 100*(sum(lcounts*lcounts))/(count*count)
    result$epre <- 100*(result$epcp-result$epmc)/(100-result$epmc)

    ##result$pr <- pr
    
    if(ctable) {
        result$ctable <- table(rclass,pclass)
        ## ,dnn=c(response,
        ##                                      paste('predicted values of',
        ##                                            response)))
    }

    class(result) <- "epcp"
    return (result)
}

epcp.vglm <- function (fit, data=NULL, ctable=TRUE, cutoff=0.5) {
    ## Ignore any cutoff argument.
    if(!inherits(fit, "vglm")) stop("Not a VGLM fit")

    pr <- predict(fit, type="response")
    count <- dim(pr)[1]
    levs <- dim(pr)[2]
    rows <- as.integer(rownames(pr))
    ## Classify the response
    pclass <- max.col(pr)
    ## Get the response (dependent variable) from the fit
    response <- attr(terms(fit), "variables")[[2]]
    if (is.null(data))
        data <- eval(fit@call$data)
    if (is.null(data))
        rclass <- eval(response)[rows]
    else
        rclass <- eval(response, data, parent.frame())[rows]

    ##pclass <- as.ordered(pclass)
    ##levels(pclass) <- levels(rclass)

    ## Hopefully rclass is in the same order as pclass...
    mat <- table(rclass, pclass)
    correct <- sum(diag(mat))

    lcounts <- table(rclass)
    modal <- max(lcounts)

    result <- list()
    
    result$pcp <- correct/count*100
    result$pmc <- modal/count*100
    result$pre <- 100*(result$pcp-result$pmc)/(100-result$pmc)

    ## Takes advantage of the row operations in R/S
    lcorrect <- numeric(levs)
    for(level in 1:levs) {
        lcorrect[level] <- sum(pr[,level]*ifelse(pclass==level, 1, 0))
    }
    result$epcp <- 100*sum(lcorrect)/count
    result$epmc <- 100*(sum(lcounts*lcounts))/(count*count)
    result$epre <- 100*(result$epcp-result$epmc)/(100-result$epmc)

    if(ctable) {
      result$ctable <- mat
    }

    class(result) <- "epcp"
    return (result)
}
