tt1Return to QFC home

QFC ADMB & R user functions

  • Useful ADMB functions
  • Useful R functions
    • glm.run
      based on the set of variables, it will pick the best set of models based on AIC from glm runs with regard to all combinations of input variables
    • admb2List
      read in admb report file or dat file into R as a list to give you access these data in R working environment, data format is simple as # follow by your variable name with the actual data in separate line, it will automatically detect the input data as sclar, vector, matrix etc
    • read.admbFit
      read in admb output .par, .cor files and give you all the estimates, std, cor and variance-covariance matrix etc as a list in R



  • rgamma()
    
    
    //===================================== generate random gamma number
    // create random number following gamma distributions by using acceptance-reject method, 
    // Gamma(alpha, belta)=x^(alpha-1)*belta^alpha*exp(-belta*x)/gamma(alpha)
    // alpha is the shape par, >0  =1/CV^2
    // belta is the rate, inverse of the scale parameter, >0  =1/(mean*CV^2)
    // mean is alpha/belta,  variance is alpha/(belta^2)
    double rgamma(double alpha, random_number_generator& rnd) 
    {
      double  v0, v1, v2, fract, em, qm, tmp, gam_n1; 
      int i; 
    
      //calculate Gamma(n,1) integer part first
      tmp = 1.;
      if (int(alpha) == 0) //which means alpha lie in 0,1
          gam_n1 = 0;
      else{
          for( i = 1;i<=int(alpha);i++)
              tmp *= randu(rnd);   //using modified rnd()
          
          gam_n1 = -1. * log(tmp);
      }
    
      fract = alpha - int(alpha) + EPS;  //fractional part of alpha
      v0 = MathE / (MathE + fract);
    
      //calculate the fractional gamma(fract,1)
      while(1){
          v1 =  randu(rnd);
          v2 =  randu(rnd);
          if (v1 <= v0){
              em = pow(v1 / (v0 + EPS), 1. / fract);
              qm = v2 * pow(em, fract - 1.);
          }else{
              em = 1. - log((v1 - v0) / (1. - v0 + EPS));
              qm = v2 * mfexp(-em);
          }
          if (qm <= (pow(em, fract - 1.) * mfexp(-em))) break;        
      }
    
      return (em + gam_n1);
    }
    
    
    
    
    //======================================= generate random gamma number
    // overloading function: using two parameters, alpha is shape parameter, beta is rate =1/scale
    double rgamma(double alpha, double beta, random_number_generator& rnd) 
    {return rgamma(alpha,rnd)/beta; }
    


  • rdirichlet()
    
    //==================================== generate dirichlet random number
    //inside this function call above rgamma()
    //using one parameter, shape, # category is the size of shape parameter
    dvector rdirichlet(const dvector& shape,random_number_generator& rnd)
    {
      double tot=0;
      int ncat=shape.size();
      dvector gam(shape.indexmin(),ncat);
      dvector results(shape.indexmin(),ncat);
      int i;
    
      //generate gamma random number first
      for (i=shape.indexmin(); i<=shape.indexmax(); i++) {
        gam[i]=rgamma(shape[i],rnd);        
      }
    
      tot=sum(gam);
      //normalize them by its sum
      for (i=shape.indexmin(); i<=shape.indexmax(); i++) {
        results[i] = gam[i]/tot;
        
        if (results[i]< EPS)
          results[i]= EPS; // not put zero 
       
      }
      return results;
    }
    


  • sample()
    
    // sampling with/without replacement based on samples(source) and size(nSample) 
    //based on the input source site index, return the index, access the samples via source(out)
    //withReplace=0, for sampling without replacement,never being sampled more than once
    //withReplace=1 for with replacement, may being sampled more than once
    ivector sample(const dvector& source,int nSample,int withReplace,const random_number_generator& rnd)
    {
      int totN=source.size();
      dvector lookup(source.indexmin(),source.indexmax());
      lookup=source;
      ivector out(1,nSample);
    
      if(withReplace==0){  //sampling without replacement, all unique site index
        out(1)=source.indexmin()+int(totN*randu(rnd)); 
        lookup(out(1))=0;//if selected, then mark lookup as 0
    
        for(int i=2;i<=nSample;i++){ 
          out(i)=source.indexmin()+int(totN*randu(rnd)); 
          while(lookup(out(i))==0){ //which means already being selected
            out(i)=source.indexmin()+int(totN*randu(rnd));
          }
          lookup(out(i))=0;//if selected, then mark lookup as 0
        }
      }else{  //with replacement,allow repeat sampling    
        for(int i=1;i<=nSample;i++) 
          out(i)=source.indexmin()+int(totN*randu(rnd)); 
      }
    
      return out;  //you can use source(out) to access the sample
    }
    
    
    //======= sampling with/without replacement
    //overloading function, here the source is a int, actually is 1:source
    //now the source is integer, it implicitly refer to 1:source for indexing
    //withReplace=0, for sampling without replacement,never being sampled more than once
    //withReplace=1 for with replacement, may being sampled more than once
    dvector sample(int source,int nSample,int withReplace,const random_number_generator& rnd)
    {
      dvector lookup(1,source);
      lookup.fill_seqadd(1,1); 
      dvector out(1,nSample);
     
      if(withReplace==0){  //sampling without replacement, all unique site index
        out(1)=1+int(source*randu(rnd)); 
        lookup(out(1))=0;//if selected, then mark lookup as 0
    
        for(int i=2;i<=nSample;i++){ 
          out(i)=1+int(source*randu(rnd)); 
          while(lookup(out(i))==0){ //which means already being selected
            out(i)=1+int(source*randu(rnd));
          }
          lookup(out(i))=0;//if selected, then mark lookup as 0
        }
      }else{  //with replacement,allow repeat sampling
        for(int i=1;i<=nSample;i++) 
          out(i)=1+int(source*randu(rnd)); 
      }
    
      return out;
    }
    


  • glm.run()
    
    #  glm.run()  will run glm for every linear combination of the input variables in X, and 
    #  return a list of models which had the aic different within the aicdif, default set as 3
    # Xbest for previous best models, it can hold any number of predictors, and 
    # Xbest is always being included in the glm as a base model
    # Xbest=c("Age","TLmm","Wkg","cond")  
    # if you don't have base model, Xbest="" or Xbest=NULL , 
    # response variable is Y="iMat"
    # X hold predictor variables, it will run for all combinations of X
    # all variable in X will be added sequentially by each one, two combi, three combi,...
    # X=c("len.inc","len.inc2",...)
    # aic.adjust =True is only for small sample size, default is false, not adjust AIC
    # The return is a list of best models which aic dif are within aicdif=3 range
    # each element in a list which has aic, aicdif, var(variable names) and glm return object
    # usage: Y<-"MU1Harvest"
    # pred <- c("YPEffortMU1pre","WaEffortMU1pre","YPpopMU1","WalleyePop")
    # mylist1 <-glm.run(Y,"", pred, data=iData, family=gaussian, aicdif=3,aic.adjust=T)
    #
    glm.run <-function(Y,Xbest,X,data=data,family=family,aicdif=3, aic.adjust=FALSE,...)
    {
      #list n number of variable combination from x variable vector
      combination<-function(x,n,index=1)
      {  mat<-NULL
        if(length(x)n)
        { # recursive call      
          for(i in 1:length(x))
            if(i>=index) 
              mat<-rbind(mat,combination(x[-i],n,i))      
        }   
        return(mat)
      }
      
      #adjust aic based on sample size
      adjustAIC<-function(aic,n,k) return( aic+2*k*(k+1)/(n-k-1) )  
      
      # if user don't provide Xbest,Nobestis true, ow Xbest should hold predictors
      Nobest<-ifelse( is.null(Xbest) || Xbest=="" || nchar(Xbest)<1,TRUE,FALSE)
      
      glmout<-list()  # hold all model output
      idx<-0 # index for output list glmout
      numX <- length(X)
      
      # run previous best model on this dataset
      if(!Nobest)
      {
          X.pre<-paste(Xbest,collapse=" + ")
          glm.pre<- glm(as.formula(paste(Y,X.pre,sep=" ~ ")),
                       data=data,family=family) 
          if(aic.adjust)  
            aic<- adjustAIC(summary(glm.pre)$aic,dim(data)[1],length(Xbest)+1)
          else      
            aic<- summary(glm.pre)$aic
          cat("\n***The best previous model: ", X.pre, "     AIC= ",aic,"\n")
          idx<-idx+1
          glmout[[idx]]<-list(aic=aic,var=c(Y,Xbest),glm.pre)
      }
    
      # run glm for all variables combinations in X
      for(i in 1:numX)
      {  
        x.mat<-combination(X,i) # x.mat is a matrix
        for(j in 1:nrow(x.mat))
        {
          x.run<-ifelse(Nobest,paste(x.mat[j,],collapse=" + "),
              paste(paste(Xbest,collapse=" + "),X[i], sep=" + "))
          
          glm.tmp<- glm(as.formula(paste(Y,x.run,sep=" ~ ")),
                           data=data,family=family)
           
          if(aic.adjust)  
            aic<- adjustAIC(summary(glm.tmp)$aic,dim(data)[1],
                ifelse(Nobest,0,length(Xbest))+1+i)  # 1 is intercept
          else      
            aic<- summary(glm.tmp)$aic   #not adjust
              
          cat(i," predictor  added:  ",paste(x.mat[j,],collapse=" + "), 
              "     AIC= ", aic,"\n")     
          idx<-idx+1
          glmout[[idx]]<-list(aic=aic,var=c(Y,x.mat[j,]),glm.tmp) 
        }
      }
      
      #following find min aic model 
      idx<-0
      tmp<-numeric(0)
      #only hold output list which in aic dif range
      outlist<-list()
      for(i in 1:length(glmout))
        tmp[i]<-glmout[[i]]$aic
      for(i in 1:length(glmout))
        glmout[[i]]$aicdif<- glmout[[i]]$aic- min(tmp)  # aicdif value added
        
      #list all the model which aic difference within aicdif value
      for(i in 1:length(glmout))
        if(glmout[[i]]$aicdif <= aicdif)
        {
          idx<-idx+1
          cat("*** Picked  model ",idx,", aic=", glmout[[i]]$aic," , aic difference=", 
              glmout[[i]]$aicdif ," , model formula ",paste(glmout[[i]][[2]][1],
              paste(glmout[[i]][[2]][-1],collapse="+"),sep="~"),"\n") 
          outlist[[idx]]<- glmout[[i]] 
        }
      # output the smallest aic model
      for(i in 1:length(outlist))
        tmp[i]<-outlist[[i]]$aic
      i<-which.min(tmp)  #index for smallest aic
      cat("\nSummary: the lowest AIC model is ", i," \n",paste(outlist[[i]][[2]][1],
          paste(outlist[[i]][[2]][-1],collapse="+"),sep="~"), "   which aic=",
          outlist[[i]]$aic, " , aic difference=", outlist[[i]]$aicdif,"\n\n")
      return(outlist)  
    }
    


  • admb2List()
    
      # admb2List()  convert the admb report/data file into a list and import it in R
      #  which require the data has some specific format
      #  basically using # follow by variable name, then follow by 
      #  real data starting in new lines  
      #  # varname1  varname2
      #    23       34
      # or input character names
      #  # varnames
      #     K0   Linf   t0
      #  varname better be short, it will be variable name in the r list
      # usage:  mylist=admb2List("test.txt",import=FALSE,envir=.GlobalEnv)
      # used for read in data from admb output file like xxx.rep
      # and assign the variable to the global workspace, so you can use varname
      # under the R cmd window
    
    
      admb2List<- function (fname,import=TRUE,envir=.GlobalEnv)
      {
        if (!file.exists(fname)) stop("File ", fname, " does not exist.\n")
        f <- scan(fname,what=character(),sep = "\n", quiet =TRUE,blank.lines.skip=F)
        if (!length(f)) stop("Input file is empty \n")
    
        # will trim off the leading and trailing space for a string
        trim <- function(istr) gsub("^[[:space:]]*|[[:space:]]*$", "", istr)
    
        # first to get all variable names as list and get the start and finish row number
        varName <- list()
        idx<-0 #index for numb of new variables
        finLine<- FALSE #determine whether finish line
        newVar<- FALSE
        op<-options(warn=-1)  #turn off warning only for converting string to double later
    
    
        for (i in 1:length(f))   # first for loop
        {
          if (any(grep("^[ \t]*#", f[i]))) #comment line,num of variables
          {
            n=trim(gsub("\\#","",f[i]))
            idx<-idx+1
            varName[[idx]]<-list(name=n,start=i)  #start is actual row number
            newVar<- TRUE
            finLine<- FALSE
       	    #cat(i,"***",idx,"***",varName[[idx]]$name,"\n")
          }
          else if(newVar)  #found new variable
          {
            if(trim(f[i]) != "")  #dataline
            {
              if(i== length(f)) # last line, end of data file
              {
                varName[[idx]]$finish<- i
                finLine<-FALSE
                newVar<- FALSE
              }
              else  # normal dataline
              {
                varName[[idx]]$finish<- i    #keep update finish row num
                finLine<-TRUE
              }
            }
            else  #blank line
            {
              if(finLine) #which means blank line is after the datalines
              {
                varName[[idx]]$finish<- i-1
                finLine<-FALSE
                newVar<- FALSE
              }
              else  # blank line before the dataline
                varName[[idx]]$start<- i
            }
    
          }
          #cat(" i, f[i] ",i,"\t",f[i],"\n")
          #cat(i,"***",idx,"***",varName[[idx]]$name,varName[[idx]]$start,
          # varName[[idx]]$finish,newVar,finLine,"\n")
        }# end first for loop
    
    
        idx<-0
        i<-1
        data <- list()
        #second step, read in numerical/character data
        while (i <=length(f))
        {
          if (any(grep("^[ \t]*#", f[i]))) # readin comment line, looping variable name
          {
            idx<-idx+1
          }
          else if (trim(f[i]) != "") # data line,looping num data
          {
            nrow<- varName[[idx]]$finish-varName[[idx]]$start
            #determine if have more than one variable in variable name
            manyName <- trim(unlist(strsplit(gsub("[ \t]\\b","\\$",varName[[idx]]$name),"\\$")))
            
            if (nrow==1)  #####  read in sclar or vector
            {
              dummy<-trim(unlist(strsplit(gsub("[ \t]\\b","\\$",trim(f[varName[[idx]]$finish])),"\\$")))
              ncol<-length(dummy)
              newf <- as.double(dummy)
              if(length(manyName)==ncol)  # more variable names for each one
              { 
                for(k in 1:ncol){
                  data[[manyName[k]]]<-newf[k]
                  if(is.na(newf[1])) #if input is characters
                    data[[varName[[idx]]$name]]<- dummy[k]
                }
              } 
              else #use one common variable name
              {
                data[[varName[[idx]]$name]]<- newf
                if(is.na(newf[1])) #if input is characters
                  data[[varName[[idx]]$name]]<- dummy
              }
            }
            else #####  read in matrix
            {
              #first get # column
              dummy<-trim(unlist(strsplit(gsub("[ \t]\\b","\\$",trim(f[varName[[idx]]$finish])),"\\$")))
              ncol<-length(dummy)
              temp<-vector("list",nrow)  #create list to hold data
              for(j in 1:nrow) {
                dummy<-trim(unlist(strsplit(gsub("[ \t]\\b","\\$",trim(f[i+j-1])),"\\$")))
                newf <- as.double(dummy)
                temp[[j]]<- newf
              }
              
              data[[varName[[idx]]$name]]<-do.call(rbind,temp) #row bind list as a MATRIX, not dataframe
              
              if(length(manyName)==ncol)  # more variable names for each
              { 
                for(k in 1:ncol)
                  data[[manyName[k]]]<- data[[varName[[idx]]$name]][,k]
                #data[[varName[[idx]]$name]]<-NULL  # you may remove the long variable name one 
              } 
    
              i<-varName[[idx]]$finish  #update i afer readin matrix
            }
    
          }
          i<-i+1
        } # end while loop
    
        #import variable to specific environment
        if(import){
          for (i in 1:length(data))  
            assign(names(data[i]),data[[i]],envir) }
        options(op)
        return(data)
      }
     
    
    


  • read.admbFit()
    
    ## Function to read AD Model Builder fit from its par and cor files.
    ## Use by:
    ## simple.fit <- read.admbFit('c:/admb/examples/simple')
    ## Then the object 'simple.fit' is a list containing sub-objects
    # 'names', 'est', 'std', 'cor', and 'cov' for all model
    # parameters and sdreport quantities.
    #
    read.admbFit<-function(file){
      ret<-list()
      # read par file
      parfile<-as.numeric(scan(paste(file,'.par', sep=''),
        what='', n=16, quiet=TRUE)[c(6,11,16)])
      ret$nopar<-as.integer(parfile[1])
      ret$nloglike<-parfile[2] #objective function value
      ret$maxgrad<-parfile[3]
      
      # read cor file
      file<-paste(file,'.cor', sep='')
      lin<-readLines(file)
      # total parameter including sdreport variables
      ret$totPar<-length(lin)-2
      #log of the determinant of the hessian
      ret$logDetHess<-as.numeric(strsplit(lin[1], '=')[[1]][2])
      sublin<-lapply(strsplit(lin[1:ret$totPar+2], ' '),function(x)x[x!=''])
      ret$names<-unlist(lapply(sublin,function(x)x[2]))
      ret$est<-as.numeric(unlist(lapply(sublin,function(x)x[3])))
      ret$std<-as.numeric(unlist(lapply(sublin,function(x)x[4])))
      ret$cor<-matrix(NA, ret$totPar, ret$totPar)
      corvec<-unlist(sapply(1:length(sublin), function(i)sublin[[i]][5:(4+i)]))
      ret$cor[upper.tri(ret$cor, diag=TRUE)]<-as.numeric(corvec)
      ret$cor[lower.tri(ret$cor)] <- t(ret$cor)[lower.tri(ret$cor)]
      # covariance matrix
      ret$cov<-ret$cor*(ret$std %o% ret$std)
      return(ret)
    }