mlogit.control <- function (epsilon = 1e-08,
    maxit = 20,
    IRLS.maxit = 0,
    trace = FALSE)
{
    if (!is.numeric(epsilon) || epsilon <= 0)
        stop("value of 'epsilon' must be > 0")
#     if (!is.numeric(optim.tol) || epsilon <= 0)
#         stop("value of 'optim.tol' must be > 0")
    if (!is.numeric(maxit) || maxit <= 0 || !is.numeric(IRLS.maxit) || IRLS.maxit < 0)
        stop("maximum number of iterations must be > 0")
    list(epsilon = epsilon, maxit = maxit, trace = trace, IRLS.maxit=IRLS.maxit)
}


mlogit <- function(formula, baseline=1, data, weights,
                subset, na.action, start = NULL, control=mlogit.control(...),
                model = TRUE, x = FALSE, y = TRUE,
                contrasts = NULL, ...){
     call <- match.call()
     ## extract x, y, etc from the model formula and frame
    if(missing(data)) data <- environment(formula)
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0)
    mf <- mf[c(1, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1]] <- as.name("model.frame")
    mf <- eval(mf, parent.frame())
    mt <- attr(mf, "terms") 

    Y <- model.response(mf, "any") # e.g. factors are allowed
    if(is.matrix(Y)){
        if(ncol(Y)<2) stop("need at least two response categories")
        }
    else if(is.atomic(Y))
        Y <- Dummies(Y)
    else stop("cannot left hand side of mode",mode(Y))
    if(any(Y<0)) stop("negative frequencies not allowed")
    if(!length(colnames(Y))) colnames(Y) <- as.character(seq(ncol(Y)))
    if(is.character(baseline)) baseline <- match(baseline,colnames(Y))
    basename <- colnames(Y)[baseline]
    if(!(baseline %in% seq(ncol(Y)))) stop("invalid baseline category")
    ## null model support
    X <- if (!is.empty.model(mt)) model.matrix(mt, mf, contrasts) else matrix(,NROW(Y),0)
    ## avoid any problems with 1D or nx1 arrays by as.vector.
    weights <- as.vector(model.weights(mf))
    if(!is.null(weights) && !is.numeric(weights))
        stop("'weights' must be a numeric vector")
    weights <- if(!is.null(weights)) weights*rowSums(Y) else rowSums(Y)
    Ynames <- colnames(Y)
    Y <- Y[,-baseline,drop=FALSE]/rowSums(Y)
    Y[weights==0,] <- 0
    fit <- mlogit.fit(X=X,Y=Y,weights=weights,start=start,control=control,
                      intercept = attr(mt, "intercept") > 0)
    if(model) fit$model <- mf
    fit$na.action <- attr(mf,"na.action")
    if(x) fit$x <- X
    if(!y) fit$y <- NULL
    fv <- fit$fitted.values
    fv <- matrix(0,nrow=nrow(fv),ncol=ncol(fv)+1)
    fv[,-baseline] <- fit$fitted.values
    fv[,baseline] <- 1 - rowSums(fit$fitted.values)
    colnames(fv) <- Ynames
    rownames(fv) <- rownames(fit$fitted.values)
    fit$fitted.values <- fv
    fit <- c(fit, list(call = call, formula = formula,
                        terms = mt, data = data,
                        control = control,
                        contrasts = attr(X, "contrasts"),
                        xlevels = .getXlevels(mt, mf),
                        baseline = basename,
                        dispersion = 1
                      ))
    class(fit) <- "mlogit"
    fit
}

mlogit.P <- function(Eta){
  expEta <- matrix(1,nrow=nrow(Eta),ncol=ncol(Eta)+1)
  expEta[,-1] <- exp(Eta)
  expEta[,-1,drop=FALSE]/rowSums(expEta)
}

mlogit.dev.resids <- function(Y,P,weights){
  #if(length(P)<length(Y))
  #  P <- matrix(P,nrow=nrow(Y),ncol=ncol(Y),byrow=TRUE)
  if(!identical(dim(Y),dim(P))) stop("Y and P do not match")
  Y <- cbind(1-rowSums(Y),Y)
  P <- cbind(1-rowSums(P),P)
  dev.resids <- array(0,dim=dim(Y))
  dev.resids[] <- suppressWarnings(ifelse(Y<=0|weights==0,0,2*weights*Y*(log(Y/P))))
  dev.resids
}

mlogit.logLik <- function(Y,P,weights){

  if(!identical(dim(Y),dim(P))) stop("Y and P do not match")
  Y <- cbind(1-rowSums(Y),Y)
  P <- cbind(1-rowSums(P),P)
  ll <- suppressWarnings(ifelse(Y<=0|weights==0,0,weights*Y*(log(P))))
  sum(ll)
}

logLik.mlogit <- function(object,...) object$logLik

print.mlogit <- function(x,digits= max(3, getOption("digits") - 3), ...){
    cat("\nCall: ", deparse(x$call), "\n\n")
    cat("Baseline category: ",x$baseline,"\n")
    if(length(coef(x))) {
        cat("Coefficients")
        if(is.character(co <- x$contrasts))
            cat("  [contrasts: ",
                apply(cbind(names(co),co), 1, paste, collapse="="), "]")
        cat(":\n")
        print.default(format(x$coefficients, digits=digits),
                      print.gap = 2, quote = FALSE)
    } else cat("No coefficients\n\n")
    if(nchar(mess <- naprint(x$na.action))) cat("  (",mess, ")\n", sep="")
    cat("Null Deviance:    ",   format(signif(x$null.deviance, digits)),
        "\nResidual Deviance:", format(signif(x$deviance, digits)),
        "\n")
    invisible(x)
}

summary.mlogit <- function(object,correlation = FALSE, symbolic.cor = FALSE,...){
    ## calculate coef table

    coef <- object$coefficients
    var.cf <- diag(object$covmat)
    s.err <- sqrt(var.cf)
    zvalue <- coef/s.err
    pvalue <- 2*pnorm(-abs(zvalue))

    coef.table <- array(NA,dim=c(dim(coef),4))
    dimnames(coef.table) <- c(dimnames(coef),
        list(c("Estimate", "Std. Error","z value","Pr(>|z|)")))
    coef.table[,,1] <- coef
    coef.table[,,2] <- s.err
    coef.table[,,3] <- zvalue
    coef.table[,,4] <- pvalue
    if(getOption("mlogit.show.baseline")){
        dimnames(coef.table)[[2]] <- 
            paste(dimnames(coef.table)[[2]],
                object$baseline,sep=getOption("mlogit.baseline.sep")
                )
    }

    ans <- c(object[c("call","terms","baseline","deviance","contrasts",
                       "null.deviance","iter","na.action")],
              list(coefficients = aperm(coef.table,c(1,3,2)),cov.scaled=object$covmat,
                    cov.unscaled=object$covmat))
    p <- length(coef)
    if(correlation && p > 0) {
        dd <- sqrt(diag(ans$covmat.unscaled))
        ans$correlation <-
            ans$covmat.unscaled/outer(dd,dd)
        ans$symbolic.cor <- symbolic.cor
    }
    class(ans) <- "summary.mlogit"
    return(ans)
}

print.summary.mlogit <-
    function (x, digits = max(3, getOption("digits") - 3),
              symbolic.cor = x$symbolic.cor,
              signif.stars = getOption("show.signif.stars"), ...){
    cat("\nCall:\n")
    cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="")

    coefs <- x$coefficients

    for(i in 1:dim(coefs)[3]){
      eqtitle <- paste("Equation: ",dimnames(coefs)[[3]][i])
      underline <- paste(rep("-",nchar(eqtitle)),collapse="")
      cat("\n\n",eqtitle,"\n",sep="")
      cat(underline,"\n")
      printCoefmat(coefs[,,i], digits=digits, signif.stars=signif.stars,
                     na.print="NA", ...)
    }
    cat("\n")  
    cat("AIC: ", format(x$aic, digits= max(4, digits+1)),"\n\n",
        "Number of Fisher Scoring iterations: ", x$iter,
        "\n", sep="")
    correl <- x$correlation
    if(!is.null(correl)) {
        p <- NCOL(correl)
        if(p > 1) {
            cat("\nCorrelation of Coefficients:\n")
            if(is.logical(symbolic.cor) && symbolic.cor) {
                print(symnum(correl, abbr.col = NULL))
            } else {
                correl <- format(round(correl, 2), nsmall = 2, digits = digits)
                correl[!lower.tri(correl)] <- ""
                print(correl[-1, -p, drop=FALSE], quote = FALSE)
            }
        }
    }
    
    cat("\n")
    invisible(x)
}




getSummary.mlogit <- function(obj,
            alpha=.05,...){
  
  smry <- summary(obj)
  N <- if(length(obj$prior.weights)) sum(obj$prior.weights)
    else sum(smry$df[1:2])
  
  coef <- smry$coef
  coef.dims <- dim(coef)
  coef.dimnames <- dimnames(coef)
  
  lower <- qnorm(p=alpha/2,mean=coef[,1,],sd=coef[,2,])
  upper <- qnorm(p=1-alpha/2,mean=coef[,1,],sd=coef[,2,])

  coef <- c(aperm(coef,c(1,3,2)),lower,upper)
  coef <- aperm(array(coef,dim=c(coef.dims[1],coef.dims[3],coef.dims[2]+2)),
              c(1,3,2))
  dimnames(coef) <-c(coef.dimnames[1],
              list(c("est","se","stat","p","lwr","upr")),
              list(coef.dimnames[[3]]))

#   eqs <- seq.int(dim(coef)[3])
#   coeflist <- lapply(eqs,function(i){
#                   coef.i <- coef[,,i,drop=FALSE]
#                   dim(coef.i) <- dim(coef)[1:2]
#                   dimnames(coef.i) <- dimnames(coef)[1:2]
#                   coef.i
#                 })
#   names(coeflist) <- dimnames(coef)[[3]]
                
  LR <- smry$null.deviance - smry$deviance
  
  deviance <- deviance(obj)


  sumstat <- c(
          LR             = LR,
          logLik        = obj$logLik,
          deviance      = deviance,
          N             = N
          )

  #coef <- apply(coef,1,applyTemplate,template=coef.template)
  
  #sumstat <- drop(applyTemplate(sumstat,template=sumstat.template))
  extra <- paste("Baseline category: ",obj$baseline)
  list(coef=coef,sumstat=sumstat,extra=extra)
}


anova.mlogit <- function (object, ..., dispersion = NULL, test = NULL)
{
    dotargs <- list(...)
    named <- if (is.null(names(dotargs)))
        rep(FALSE, length(dotargs))
    else (names(dotargs) != "")
    if (any(named))
        warning("the following arguments to 'anova.glm' are invalid and dropped: ",
            paste(deparse(dotargs[named]), collapse = ", "))
    dotargs <- dotargs[!named]
    is.mlogit <- unlist(lapply(dotargs, function(x) inherits(x,
        "mlogit")))
    dotargs <- dotargs[is.mlogit]
    if (length(dotargs) > 0)
        return(anova.mlogitlist(c(list(object), dotargs), dispersion = dispersion,
            test = test))
    varlist <- attr(object$terms, "variables")
    X <- if (n <- match("x", names(object), 0))
        object[[n]]
    else {
        mf <- object$call
        m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0)
        mf <- mf[c(1, m)]
        mf$drop.unused.levels <- TRUE
        mf[[1]] <- as.name("model.frame")
        mf <- eval(mf, parent.frame())
        mt <- attr(mf, "terms") 
        if (!is.empty.model(mt)) model.matrix(mt, mf, contrasts) else matrix(,object$y,0)
    }
    varseq <- attr(X, "assign")
    nvars <- max(0, varseq)
    resdev <- resdf <- NULL
    if (nvars > 1) {
        for (i in 1:(nvars - 1)) {
            fit <- mlogit.fit(X = X[, varseq <= i, drop = FALSE],
                Y = object$y,
                weights = object$prior.weights,
                intercept= attr(mt, "intercept") > 0,
                control = object$control)
            resdev <- c(resdev, fit$deviance)
            resdf <- c(resdf, fit$df.residual)
        }
    }
    resdf <- c(object$df.null, resdf, object$df.residual)
    resdev <- c(object$null.deviance, resdev, object$deviance)
    table <- data.frame(c(NA, -diff(resdf)), c(NA, pmax(0, -diff(resdev))),
        resdf, resdev)
    tl <- attr(object$terms, "term.labels")
    if (length(tl) == 0)
        table <- table[1, , drop = FALSE]
    dimnames(table) <- list(c("NULL", tl), c("Df", "Deviance",
        "Resid. Df", "Resid. Dev"))
    title <- paste("Analysis of Deviance Table", "\n\nModel: multinomial logit",
        "\n\nResponse: ", as.character(varlist[-1])[1], "\n\nTerms added sequentially (first to last)\n\n",
        sep = "")
    df.dispersion <- Inf
    if (is.null(dispersion)) {
        dispersion <- object$dispersion
        df.dispersion <- if (dispersion == 1)
            Inf
        else object$df.residual
    }
    if (!is.null(test)) {
        if (test == "F") {
            warning("using F test with a multinomial logit model is inappropriate")
        }
        table <- stat.anova(table = table, test = test, scale = dispersion,
            df.scale = df.dispersion, n = NROW(X))
    }
    structure(table, heading = title, class = c("anova", "data.frame"))
}

anova.mlogitlist <- function(object, ..., dispersion=NULL, test=NULL)
{
    # copied and modified from anova.glmlist
    ## find responses for all models and remove
    ## any models with a different response

    responses <- as.character(lapply(object, function(x) {
        deparse(formula(x)[[2]])} ))
    sameresp <- responses==responses[1]
    if(!all(sameresp)) {
        object <- object[sameresp]
        warning("models with response ", deparse(responses[!sameresp]),
                " removed because response differs from model 1")
    }

    ns <- sapply(object, function(x) NROW(x$residuals))
    if(any(ns != ns[1]))
        stop("models were not all fitted to the same size of dataset")

    ## calculate the number of models

    nmodels <- length(object)
    if(nmodels==1)
        return(anova.mlogit(object[[1]], dispersion=dispersion, test=test))

    ## extract statistics

    resdf  <- as.numeric(lapply(object, function(x) x$df.residual))
    resdev <- as.numeric(lapply(object, function(x) x$deviance))

    ## construct table and title

    table <- data.frame(resdf, resdev, c(NA, -diff(resdf)),
                        c(NA, -diff(resdev)) )
    variables <- lapply(object, function(x)
                        paste(deparse(formula(x)), collapse="\n") )
    dimnames(table) <- list(1:nmodels, c("Resid. Df", "Resid. Dev", "Df",
                                         "Deviance"))
    title <- "Analysis of Deviance Table\n"
    topnote <- paste("Model ", format(1:nmodels),": ",
                     variables, sep="", collapse="\n")

    ## calculate test statistic if needed

    if(!is.null(test)) {
        bigmodel <- object[[order(resdf)[1]]]
        dispersion <- bigmodel$dispersion
        df.dispersion <- if (dispersion == 1) Inf else min(resdf)
        if (test == "F") {
            warning("using F test with a multinomial logit model is inappropriate")
        }
        table <- stat.anova(table = table, test = test,
                            scale = dispersion, df.scale = df.dispersion,
                            n = NROW(bigmodel$residuals))
    }
    structure(table, heading = c(title, topnote),
              class = c("anova", "data.frame"))
}



mlogit.fit <- function(X,Y,weights=rep(1,nobs),
                  start=NULL,control=mlogit.control(),intercept=TRUE){
    xnames <- colnames(X)
    ynames <- dimnames(Y)
    conv <- FALSE
    use.NR <- FALSE
    nobs <- nrow(Y)
    ncat <- ncol(Y)
    nvars <- ncol(X)
    EMPTY <- nvars == 0
    P0 <- if(intercept)
       matrix(
            colSums(weights*Y)/sum(weights),
            nrow=nobs,
            ncol=ncat,
            byrow=TRUE
            )
       else matrix(
            1/(ncat+1),
            nrow=nobs,
            ncol=ncat
            )
    if(EMPTY){
      Eta <- matrix(0,nrow=nobs,ncol=ncat)
      P <- mlogit.P(Eta)
      dev <- sum(mlogit.dev.resids(Y,P,weights))
      w <- sqrt(weights*P*(1-P))
      residuals <- (Y-P)/(P*(1-P))
      good <- rep(TRUE,nrow(residuals))
      boundary <- conv <- TRUE
      Coef <- numeric(0)
      iter <- 0
      covmat <- numeric(0)
    }
    else{
      stepped.back <- FALSE
      if(is.null(start)){
        #Pstart <- (weights*cbind(1-rowSums(Y),Y))/(weights+ncat)
        Pstart <- cbind(1-rowSums(Y),Y)+0.5
        Pstart <- Pstart/rowSums(Pstart)
        Eta <- log(Pstart[,-1,drop=FALSE]) - log(Pstart[,1]) #+  log(P0) - log(1-rowSums(P0))
        P <- Pstart <- Pstart[,-1,drop=FALSE]
        CoefOld <- start

        varP <- P*(1-P)
        W <- weights*varP
        Good <- W > 0
        good <- rowSums(Good)>0
        if (all(!good)) {
              conv <- FALSE
              warning("no observations informative at iteration ", iter)
              break
          }
        Z <- Eta[good,,drop=FALSE] + (Y-P)[good,,drop=FALSE]/varP[good,,drop=FALSE]
        x <- X[good, ]
        ngoodobs <- as.integer(nobs - sum(!good))
        Coef <- matrix(NA,nrow=nvars,ncol=ncat)
        ranks <- rep(0,ncat)
        for(j in 1:ncat){
          z <- Z[,j]
          w <- W[good,j]
          ## call Fortran code
          #fit <- .Fortran("dqrls",
          #                qr = x * w, n = ngoodobs,
          #                p = nvars, y = w * z, ny = as.integer(1),
          #                tol = min(1e-7, control$epsilon/1000),
          #                coefficients = double(nvars),
          #                residuals = double(ngoodobs),
          #                effects = double(ngoodobs),
          #                rank = integer(1),
          #                pivot = 1:nvars, qraux = double(nvars),
          #                work = double(2 * nvars),
          #                PACKAGE = "base")
          fit <- lsfit(x=x,y=z,wt=w,intercept=FALSE,
                       tol = min(1e-7, control$epsilon/1000))
          #browser()
          if (any(!is.finite(fit$coefficients))) {
              conv <- FALSE
              warning("non-finite coefficients at iteration ", iter)
              break
          }
          ## stop if not enough parameters
          if (nobs < fit$qr$rank)
              stop(gettextf("X matrix has rank %d, but only %d observations",
                            fit$qr$rank, nobs), domain = NA)
          ## update Coef matrix
          Coef[,j] <- fit$coefficients
          ranks[j] <- fit$qr$rank
        }
        Eta <- X%*%Coef
        P <- mlogit.P(Eta)
        dev <- sum(mlogit.dev.resids(Y,P,weights))
        if(!is.finite(dev))
          stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE)
        
      } else {
        Coef <- matrix(start,nrow=nvars,ncol=ncat)
        Eta <- X %*%Coef
        P <- mlogit.P(Eta)
        dev <- sum(mlogit.dev.resids(Y,P,weights))
      }
      devold <- dev
      if(control$trace)
            cat("Initial deviance =", dev, "\n")
      NR.first.iter <- 0
      ## ------- IRLS iterations ------------
      ## (inefficient but pretty stable ...)
      if(control$IRLS.maxit > 1)
       for(iter in 1:control$IRLS.maxit){

          varP <- P*(1-P)
          W <- weights*varP
          Good <- W > 0
          good <- rowSums(Good)>0
          if (all(!good)) {
                conv <- FALSE
                warning("no observations informative at iteration ", iter)
                break
            }
          Z <- Eta[good,,drop=FALSE] + (Y-P)[good,,drop=FALSE]/varP[good,,drop=FALSE]
          x <- X[good, ,drop=FALSE]
          ngoodobs <- as.integer(nobs - sum(!good))
          Coef <- matrix(NA,nrow=nvars,ncol=ncat)
          ranks <- rep(0,ncat)
          for(j in 1:ncat){
            z <- Z[,j]
            w <- W[good,j]
            # # call Fortran code
            #fit <- .Fortran("dqrls",
            #                qr = x * w, n = ngoodobs,
            #                p = nvars, y = w * z, ny = as.integer(1),
            #                tol = min(1e-7, control$epsilon/1000),
            #                coefficients = double(nvars),
            #                residuals = double(ngoodobs),
            #                effects = double(ngoodobs),
            #                rank = integer(1),
            #                pivot = 1:nvars, qraux = double(nvars),
            #                work = double(2 * nvars),
            #                PACKAGE = "base")
            fit <- lsfit(x=x,y=z,wt=w,intercept=FALSE,
                         tol = min(1e-7, control$epsilon/1000))
            if (any(!is.finite(fit$coefficients))) {
                conv <- FALSE
                warning("non-finite coefficients at iteration ", iter)
                break
            }
            ## stop if not enough parameters
            if (nobs < fit$qr$rank)
                stop(gettextf("X matrix has rank %d, but only %d observations",
                              fit$qr$rank, nobs), domain = NA)
            ## update Coef matrix
            Coef[,j] <- fit$coefficients
            ranks[j] <- fit$qr$rank
            
          }
          Eta <- X%*%Coef
          P <- mlogit.P(Eta)
          dev <- sum(mlogit.dev.resids(Y,P,weights))
          ## check for divergence
          boundary <- FALSE
          if(!is.finite(dev)){
            if(is.null(CoefOld))
                stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE)
             warning("step size truncated due to divergence", call. = FALSE)
             ii <- 1
             while (!is.finite(dev)){
                if(ii > control$maxit)
                  stop("inner loop; cannot correct step size")
                ii <- ii + 1
                Coef <- (Coef + CoefOld)/2
                Eta <- X %*% Coef
                P <- mlogit.P(Eta)
                dev <- sum(mlogit.dev.resids(Y,P,weights))
             }
              boundary <- TRUE
              if (control$trace)
                  cat("Step halved: new deviance =", dev, "\n")
          } ## inner loop 1
          ## check whether we could decrease the deviance
          if(dev > devold && iter > 1){
            stepped.back <- TRUE
            dev <- devold
            Coef <- CoefOld
            Eta <- X%*%Coef
            P <- mlogit.P(Eta)
            if (control$trace)
                cat("Changing to BFGS\n")
            break
          }
          if(control$trace)
            cat("IRLS Iteration: ",iter, "Deviance =", dev, "\n")
          ## check for convergence
          crit <- abs(dev - devold)/(0.1 + abs(dev))
          #if(control$trace)
          #  cat(" --- crit value:",crit,"epsilon:",control$epsilon,"\n")
          if (crit < control$epsilon) {
              conv <- TRUE
              break
          } else {
              devold <- dev
              CoefOld <- Coef
          }
      } ## --------- end IRLS iterations --------------
#     if(control$trace)
#           cat("Initial deviance =", dev, "\n")
      
      grad <- numeric(nvars*ncat)
      Info <- matrix(NA,nrow=nvars*ncat,ncol=nvars*ncat)
      ixix <- matrix(1:(nvars*ncat),nrow=nvars,ncol=ncat)
      
      for(iter in 1:control$maxit){

        devold <- dev
        CoefOld <- Coef
        varP <- P*(1-P)
        R <- (Y-P)

        for(j in 1:ncat){
          jj <- ixix[,j]
          #browser()
          grad[jj] <- crossprod(X,weights*R[,j])
          Info[jj,jj] <- crossprod(X,weights*varP[,j]*X)
          if(j < ncat){
            for(k in (j+1):ncat){
              kk <- ixix[,k]
              Info[jj,kk] <- Info[kk,jj] <- -crossprod(X,weights*P[,j]*P[,k]*X)
            }
          }
        }

        Coef[] <- Coef[] + solve(Info,grad)
        #browser()
        Eta <- X %*%Coef
        P <- mlogit.P(Eta)
        dev <- sum(mlogit.dev.resids(Y,P,weights))
        ## check for divergence
        boundary <- FALSE
        if(!is.finite(dev)){
          if(is.null(CoefOld))
              stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE)
            warning("step size truncated due to divergence", call. = FALSE)
            ii <- 1
            while (!is.finite(dev)){
              if(ii > control$maxit)
                stop("inner loop; cannot correct step size")
              ii <- ii + 1
              Coef <- (Coef + CoefOld)/2
              Eta <- X %*% Coef
              P <- mlogit.P(Eta)
              dev <- sum(mlogit.dev.resids(Y,P,weights))
            }
            boundary <- TRUE
            if (control$trace)
                cat("Step halved: new deviance =", dev, "\n")
        } 
        if(control$trace)
          cat("Iteration: ",iter, "Deviance =", dev)#, "\n")
        ## check for convergence
        crit <- abs(dev - devold)/(0.1 + abs(dev))
        if(control$trace)
          cat(" criterion:",crit,
              #"epsilon:",control$epsilon,
              "\n")
        if (crit < control$epsilon) {
            conv <- TRUE
            break
        } else {
            devold <- dev
            CoefOld <- Coef
        }

      }
      
      eps <- 10*.Machine$double.eps
      if (any(P < eps) || any(1-P < eps))
          warning("fitted rates numerically 0 occurred")
      varP <- P*(1-P)
      residuals <- (Y-P)/varP
      Info <- matrix(NA,nrow=nvars*ncat,ncol=nvars*ncat)
      ixix <- matrix(1:(nvars*ncat),nrow=nvars,ncol=ncat)
      for(j in 1:ncat){
        jj <- ixix[,j]
        Info[jj,jj] <- crossprod(X,weights*varP[,j]*X)
        if(j < ncat){
          for(k in (j+1):ncat){
            kk <- ixix[,k]
            Info[jj,kk] <- Info[kk,jj] <- -crossprod(X,weights*P[,j]*P[,k]*X)
          }
        }
      }
      covmat <- solve(Info)
      rownames(Coef) <- colnames(X)
      colnames(Coef) <- colnames(Y)
    } # else EMPTY
    n.ok <- nobs - sum(weights==0)
    nulldf <- (n.ok - as.integer(intercept))*ncat
    rank <- if(EMPTY) 0 else sum(ranks)
    resdf  <- n.ok*ncat - rank
    nulldev <- sum(mlogit.dev.resids(Y,P0,weights))
    logLik <- mlogit.logLik(Y,P,weights)
    colnames(P) <- colnames(Y)
    list(coefficients = Coef,covmat = covmat, residuals = residuals, fitted.values =  P,
      rank = ranks,
      linear.predictors = Eta, deviance = dev, null.deviance = nulldev, logLik = logLik,
      iter = iter, prior.weights = weights,
      df.residual = resdf, df.null = nulldf, y = Y,
      converged = conv, boundary = boundary)
}
              