Files
RPresence/R/occ.mod.2sp.fp.R
Jens Laufer 4f637e83b9 initial
2019-07-12 15:19:27 +02:00

326 lines
19 KiB
R

occMod_2SPfp<-function(psi=call(),psi.cov=data$unitcov,p=call(),p.cov=cbind(data$unitcov,data$survcov),
omeg=call(),omeg.cov, conf=call, conf.cov,
param="PsiBA", sp.contr=TRUE,modname=NULL,paoname=NULL,outfile,model=3000,
fixed=NULL,initvals=NULL,data,miscopts=''){
#' Fit two species, static occupancy (single season) model with false positive detections
#'
#' This is not intended for direct use, but instead the \code{\link{occMod}} function should be used
#' with \code{type="so.2sp.cd"}. NOTE THAT THERE MAY BE SOME CHANGES TO HOW THIS MODEL IS IMPLEMENTED
#' IN THE NEAR FUTURE.
#'
#' @param psi The right-hand side of the formula for the model to fit for occupancy probability.
#' The terms \code{SP} and \code{INT} can be used to define a species effect on occupancy and an
#' occurrence-level interaction between species accordingly, without them being defined in \code{psi.cov}.
#' @param psi.cov A data frame containing the unit-specific covariates to use for the occupancy component
#' of the model.
#' @param p The right-hand side of the formula for the model to fit for detection probability. The terms
#' \code{SP}, \code{INT_o} and \code{INT_d} can be used to define a species effect on detection, a
#' detection-level interaction where the occurrence of one species changes the detection probability of
#' the other species and a second detection-level interaction where the detection of one species changes
#' the detection probability of the other species in the same survey. These terms do not have to be
#' defined as variables in \code{p.cov}.
#' @param p.cov A data frame containing the survey-specific covariates to use for the detection component
#' of the model.
#' @param param The parameterisation to be used, either "\code{psiBA}" or "\code{nu}", which relate to
#' \code{type="so.2sp.1"} and \code{type="so.2sp.2"} in the function \code{\link{occMod}} respectively.
#' @param sp.contr \code{TRUE} (default) or \code{FALSE}. Specifies the type of contrast used for the
#' \code{SP} term. It it used as the \code{contrasts} input in the function \code{\link{contr.treatment}}.
#' @param modname (optional) a string containing the model name
#' @param paoname (optional) a string containing the filename for the temporary PRESENCE data file.
#' @param model The PRESENCE model code. DO NOT CHANGE.
#' @param maxfn maximum number of function evaluations before aborting optimization.
#' @param fixed A single-column matrix containing values for real parameters to be fixed at.
#' \code{rownnames(fixed)} should contain the index of the real parameters to be fixed.
#' @param initvals Initial values for the beta parameters at which PRESENCE begins the optimisation.
#' The default values in PRESENCE is 0.
#' @param conf limits for confidence intervals as a proportion (defalut=0.95 for 95\% conf. interval limits)
#' @param data The \code{pao} data object containing the detection data and other information.
#' @param outfile name for output file (use outfile='modname') for outfile named via model name
#' @param miscopts (see \code{\link{occMod}})
#'
#' @return returns a list of class \code{occMod} and \code{so2spCd}
#'
#' \code{occMod$beta} contains the objects:
#' \item{psi}{estimated logistic regression coefficients and std.err for prob. of occurrence.}
#' \item{psi.VC}{var-covar matrix for logistic regression coefficients for prob. of occurrence.}
#' \item{p}{estimated logistic regression coefficients and standard errors for probability of detection.}
#' \item{p.VC}{variance-covariance matrix for logistic regression coefficients for probability of detection.}
#' \item{omeg}{estimated logistic regression coefficients and std.err for prob. of local occurrence before 1st survey.}
#' \item{omeg.VC}{var-covar matrix for omeg.}
#' \item{conf}{estimated logistic regression coefficients and std.err for prob. of sample is confirmed.}
#' \item{conf.VC}{var-covar matrix for conf.}
#' \item{VC}{the full variance-covariance matrix for all logistic regression coefficients.}
#' \code{occMod$real} contains the objects:
#' \item{psi}{estimated probabilities of occurrence for each sampling unit, along with standard errors and limits of 95\% confidence interval.}
#' \item{theta}{estimated prob. of local occurrence in each survey w/ std.err and 95\% conf interval limits.}
#' \item{p}{estimated probabilities of detection for each survey, along with standard errors and limits of 95\% confidence interval.}
#' \item{th0pi}{estimated prob. of local occurrence before 1st survey w/ std err and 95\% conf. interval limits.}
#'
#' @author Jim Hines and Darryl MacKenzie
#'
#' @seealso \code{\link{occMod}}
#' @examples
#' \dontrun{
#'# load a csv data file
#'filename<-system.file("extdata/twosp_exmpl.csv",package="RPresence")
#'dethist<-read.csv(filename,as.is=T)
#'
#'nsites=nrow(dethist); nsrvys=ncol(dethist) # set number of sites,surveys from det. history data
#'dethist=matrix(as.integer(unlist(dethist)),nrow=nsites) # replace missing values (-) with NA
#'
#'## create input "pao" object, for use with occMod function
#'data=createPao(dethist,unitcov=NULL,survcov=NULL,title="twosp corr.det. example")
#'
#'## fit some models
#'
#'mod1<-occMod(model=list(psi~SP, # occupancy species-specific, no interaction, parameters: psiA, psiBA=psiBa
#' theta~SP, # local occ. species-specific: parameters, thetaA0, thetaA1 (no corr. det. model)
#' p~SP, # detection sp. specific: parms: pA, pB
#' th0pi~1),data=data,type="so.2sp.cd",param="PsiBA")
#'
#'mod2=occMod(model=list(psi~SP+INT, # species and interaction effect (psiA != psiBA != psiBa)
#' theta~SP*PRIME+BAa+INTth, # thA != thA' != thBA != thBA' != thBa != thBa'
#' p~SP+INT_o+INT_d+INT_so, # pA != pB != rA != rBA !=rBa
#' th0pi~1), # constant prop unocc before 1st segment
#' data=data, type="so.2sp.cd", fixed=NULL)
#'#
#'tbl=createAicTable(list(mod1,mod2)); print(tbl$table)
#'
#' sim_2sp_corr_det_data <- function(sites=100,surveys=12,psiA=.8,psiBA=.3,psiBa=.7,
#' thA=c(.4,.9),thBA=c(.3,.9),thBa=c(.3,.9), # 1st is theta, 2nd is theta\'
#' pA=.6,pB=.66,rA=.7,rBA=.4,rBa=.5,
#' th0piA=0,th0piBA=0,th0piBa=0) {
#' # simulates single-season 2-species correlated detections data
#' psiB=c(psiBa,psiBA); th0piB=c(th0piBa,th0piBA); h=matrix(0,sites,surveys)
#' occA=0+(runif(sites)<psiA); # randomly assign occupancy state for sp. A of each site (0=unocc, 1=occ)
#' occB=0+(runif(sites)<psiB[occA+1]) # randomly assign occupancy state for sp. B; depends on occupancy of sp. A
#' loccA=occA*(runif(sites)<th0piA); # randomly assign local-occ state before 1st segment of trail for sp. A
#' loccB=occB*(runif(sites)<th0piB[loccA+1]) # randomly assign local-occ state before 1st segment of trail for sp. B
#' thB=c(thBa,thBA); pAx=c(pA,rA); pBx=c(pB,rBa,NA,rBA)
#' for (i in 1:surveys) {
#' loccA=occA*(runif(sites)<thA[loccA+1]); # local-occ for survey i depends on previous local-occ
#' loccB=occB*(runif(sites)<thB[loccB+1+2*occA])
#' detA=loccA*(runif(sites)<pAx[occB+1]) # detection depends on local-occ state (p=0 if not local-occ)
#' detB=loccB*(runif(sites)<pBx[occA+1+2*detA])
#' h[,i]=detA+2*detB # detection-history in compressed format: 0=neither sp. detected, 1=sp. A only, 2=sp. B only, 3=both
#' }
#' return(h)
#'}
#'
#'x=sim_2sp_corr_det_data(1000) # simulate some data
#'data=createPao(x) # create_pao file from data
#'# run a model : psiA()psiBA(),psiBa,thA(),thA'(),thBA(),thBA'(),thBa(),thBa'(),pA(),pB(),rA(),rBA(),rBa()
#'mod3=occMod(model=list(psi~SP+INT, # species and interaction effect (psiA != psiBA != psiBa)
#' theta~SP*PRIME+BAa+INTth, # thA != thA' != thBA != thBA' != thBa != thBa'
#' p~SP+INT_o+INT_d+INT_so, # pA != pB != rA != rBA !=rBa
#' th0pi~1), # constant prop unocc before 1st segment
#' data=data, type="so.2sp.cd", fixed=NULL)
#'print_one_site_estimates(mod3,site=1)
#' }
options("na.action"="na.pass")
detdata=data$det.data
if (max(data$det.data)<4) detdata=data$det.data+matrix(data$survcov$conf,data$nunits,data$nsurveys)*4
samp.units<-data$nunits; data$unitnames=paste0('unit',1:samp.units)
if (is.null(psi.cov)) psi.cov=data.frame(SP=as.factor(c(rep("A",samp.units),rep("B",2*samp.units)))) else {
if (nrow(psi.cov)==samp.units) psi.cov=rbind(psi.cov,psi.cov,psi.cov)
psi.cov$SP<-as.factor(c(rep("A",samp.units),rep("B",2*samp.units)))
}
contrasts(psi.cov$SP)<-contr.treatment(2,contrasts=sp.contr)
psi.cov$INT<-as.factor(c(rep(1,2*samp.units),rep(2,samp.units))) # INT - psiBA,psiBa interaction
psi_mat<-model.matrix(psi,psi.cov)
len<-samp.units*data$nsurveys ## length of covariates in detection design matrix
if(is.null(p.cov)) p.cov<-data.frame(SURVEY=rep(1:data$nsurveys,each=samp.units)) else
p.cov$SURVEY=rep(1:data$nsurveys,each=samp.units)
p.cov=data.frame(apply(p.cov,2,rep,5))
p.cov$SP<-as.factor(c(rep("A",len), rep("B",len), rep("A",len), rep("B",2*len)))
seasn=time=NULL; for (i in 1:data$nseasons) seasn=c(seasn,rep(i,samp.units*data$nsurveyseason[i]))
for (i in 1:data$nsurveys) time=c(time,rep(i,samp.units))
p.cov$SEASON<-as.factor(rep(seasn,5));
contrasts(p.cov$SP)<-contr.treatment(2,contrasts=sp.contr)
p.cov$INT_o<-as.factor(c(rep('1',2*len),rep('2',3*len))) # interaction: r != p
p.cov$INT_d<-as.factor(c(rep(1,4*len),rep(2,len))) # interaction: rBa != rBA
p.cov$INT_so<-as.factor(c(rep(1,3*len),rep(2,2*len))) # interaction: rBa != rBA
p_mat<-model.matrix(p,p.cov);
if(is.null(omeg.cov)) omeg.cov<-data.frame(SURVEY=rep(1:data$nsurveys,each=samp.units)) else
omeg.cov$SURVEY=rep(1:data$nsurveys,each=samp.units)
omeg.cov=data.frame(apply(omeg.cov,2,rep,4))
omeg.cov$SP<-as.factor(c(rep("A",2*len), rep("B",2*len)))
seasn=time=NULL; for (i in 1:data$nseasons) seasn=c(seasn,rep(i,samp.units*data$nsurveyseason[i]))
for (i in 1:data$nsurveys) time=c(time,rep(i,samp.units))
omeg.cov$SEASON<-as.factor(rep(seasn,4));
omeg.cov$INT_d<-as.factor(c(rep(1,2*len),rep(2,2*len))) # interaction: oa != ob
omeg.cov$INT_so<-as.factor(rep(c(rep(1,len),rep(2,len)),2)) # interaction: oa != oA, ob != oB
omeg_mat<-model.matrix(omeg,omeg.cov)
if(is.null(conf.cov)) conf.cov<-data.frame(SURVEY=rep(1:data$nsurveys,each=samp.units)) else
conf.cov$SURVEY=rep(1:data$nsurveys,each=samp.units)
conf.cov=data.frame(apply(conf.cov,2,rep,2))
conf.cov$SP<-as.factor(c(rep("A",len), rep("B",len)))
seasn=time=NULL; for (i in 1:data$nseasons) seasn=c(seasn,rep(i,samp.units*data$nsurveyseason[i]))
for (i in 1:data$nsurveys) time=c(time,rep(i,samp.units))
conf.cov$SEASON<-as.factor(rep(seasn,2));
conf.cov$INT_d<-as.factor(c(rep(1,len),rep(2,len))) # interaction: cA!= cB
conf_mat<-model.matrix(conf,conf.cov)
psi.label<-c("psiA","psiBA","psiBa"); p.label<-c("pA","pB","rA","rBA","rBa")
omeg.label<-c('oa','oA','ob','oB'); conf.label<-c('cA','cB')
## create and output temporary pao file
### bind each block of occupancy covariates to unitcov
temp.cov<-cbind(psi_mat[1:samp.units,],psi_mat[samp.units+1:samp.units,],psi_mat[2*samp.units+1:samp.units,])
v1=c(paste0(rep(colnames(psi_mat),3),"_",rep(psi.label,each=ncol(psi_mat))))
colnames(temp.cov)<-v1
temp.pao<-data; temp.pao$nunitcov<-ncol(temp.cov); temp.pao$unitcov<-as.data.frame(temp.cov)
temp.pao$det.data=detdata;
rownames(temp.pao$unitcov)<-data$unitnames;
if (length(temp.pao$surveynames)<1) temp.pao$surveynames=1:temp.pao$nsurveys
## bind each block of detection covariates
temp.cov<-cbind(p_mat[1:len,],p_mat[len+1:len,],p_mat[2*len+1:len,],p_mat[3*len+1:len,],p_mat[4*len+1:len,],
omeg_mat[1:len,],omeg_mat[len+1:len,],omeg_mat[2*len+1:len,],omeg_mat[3*len+1:len,],
conf_mat[1:len,],conf_mat[len+1:len,])
v2=c(paste0(rep(colnames(p_mat),5),"_",rep(p.label,each=ncol(p_mat))))
v3=c(paste0(rep(colnames(omeg_mat),4),"_",rep(omeg.label,each=ncol(omeg_mat))))
v4=c(paste0(rep(colnames(conf_mat),2),"_",rep(conf.label,each=ncol(conf_mat))))
colnames(temp.cov)<-c(v2,v3,v4)
temp.pao$nsurvcov<-ncol(temp.cov)
temp.pao$survcov<-as.data.frame(temp.cov)
temp.pao$paoname<-ifelse(is.null(paoname),"paodata.pao",paoname)
rownames(temp.pao$survcov)<-1:dim(temp.pao$survcov)[1]
colnames(temp.pao$unitcov)<-gsub(".Intercept.","Int",colnames(temp.pao$unitcov))
colnames(temp.pao$survcov)<-gsub(".Intercept.","Int",colnames(temp.pao$survcov))
## create design matrices file
v=colnames(temp.pao$unitcov); i=grep("_psi",v)
psi.dm<-matrix(v[i],3,ncol(psi_mat),byrow=TRUE)
rownames(psi.dm)<-psi.label; colnames(psi.dm)<-paste0("a",1:ncol(psi_mat))
psi.dm.col<-ncol(psi.dm)
psi.dm=simplify_dm(psi.dm,pao=temp.pao); colnames(psi.dm)=paste0('a',1:ncol(psi.dm))
v=colnames(temp.pao$survcov); i=grep("_[pr]",v)
p.dm<-matrix(rep(matrix(v[i],nrow=5,byrow=T),each=temp.pao$nsurveys),nrow=5*temp.pao$nsurveys)
p.dm.col<-ncol(p.dm)
rownames(p.dm)<-paste0(rep(p.label,each=temp.pao$nsurveys),"[",1:temp.pao$nsurveys,"]")
colnames(p.dm)<-paste0(ifelse(is.null(gamma),"b","d"),1:ncol(p_mat))
p.dm.save=p.dm; p.dm=simplify_dm(p.dm.save,temp.pao)
v=colnames(temp.pao$survcov); i=grep("_o[aAbB]",v)
o.dm<-matrix(rep(matrix(v[i],nrow=4,byrow=T),each=temp.pao$nsurveys),nrow=4*temp.pao$nsurveys)
o.dm.col<-ncol(o.dm)
rownames(o.dm)<-paste0(rep(omeg.label,each=temp.pao$nsurveys),"[",1:temp.pao$nsurveys,"]")
colnames(o.dm)<-paste0(ifelse(is.null(gamma),"b","e"),1:ncol(omeg_mat))
o.dm.save=o.dm; o.dm=simplify_dm(o.dm.save,temp.pao)
v=colnames(temp.pao$survcov); i=grep("_c[AB]",v)
c.dm<-matrix(rep(matrix(v[i],nrow=2,byrow=T),each=temp.pao$nsurveys),nrow=2*temp.pao$nsurveys)
c.dm.col<-ncol(c.dm)
rownames(c.dm)<-paste0(rep(conf.label,each=temp.pao$nsurveys),"[",1:temp.pao$nsurveys,"]")
colnames(c.dm)<-paste0(ifelse(is.null(gamma),"b","f"),1:ncol(conf_mat))
c.dm.save=c.dm; c.dm=simplify_dm(c.dm.save,temp.pao)
tmp.unitcov=NULL; temp.pao$nunitcov=0; lst=get_not01_dm(c(psi.dm),names(temp.pao$unitcov))
if (sum(lst)>0) {
for (i in 1:length(lst)) if (lst[i]>0) tmp.unitcov=cbind(tmp.unitcov,temp.pao$unitcov[,i])
colnames(tmp.unitcov)=colnames(temp.pao$unitcov)[which(lst>0)]
}
temp.pao$unitcov=tmp.unitcov; if (!is.null(tmp.unitcov)) temp.pao$nunitcov=ncol(tmp.unitcov);
tmp.survcov=NULL; temp.pao$nsurvcov=0; lst=get_not01_dm(c(psi.dm),names(temp.pao$survcov))
if (sum(lst)>0) {
for (i in 1:length(lst)) if (lst[i]>0) tmp.survcov=cbind(tmp.survcov,temp.pao$survcov[,i])
colnames(tmp.survcov)=colnames(temp.pao$survcov)[which(lst>0)]
}
temp.pao$survcov=tmp.survcov; if (!is.null(tmp.survcov)) temp.pao$nsurvcov=ncol(tmp.survcov)
#write_pao(temp.pao)
modname=gsub(' ','',paste0('psi(',as.character(psi),')p(',as.character(p),
')omeg(',as.character(omeg),')conf(',as.character(omeg),')')[2])
if(!is.null(fixed)){
fixed$idx<-match(fixed$param,unlist(lapply(list(psi.dm,p.dm,o.dm,c.dm),rownames)))
}
rv<-runPresence(temp.pao,list(psi.dm,p.dm,NULL,NULL,o.dm,c.dm),
model,modname=modname,fixed=fixed,initvals=initvals,outfile=outfile,miscopts=miscopts)
npar<-psi.dm.col+p.dm.col+o.dm.col+c.dm.col;
### extract results from output file
v=readLines(outfile); l=length(v);
## find AIC value for each model
instns=grep("-2log(likelihood)",v,fixed=TRUE)
z<-get_line(v,instns[1])
neg2loglike<-as.numeric(z[4])
z<-get_line(v,instns[1]+1)
aic<-as.numeric(z[3]); cat('aic=',aic,'\n')
### get_coefficients
instns<-grep("Untransformed",v,fixed=TRUE)
extract<-v[(instns[1]+3):(instns[1]+2+npar)]
index<-grep("^A.+ psi",extract)
psi.coeff<-get_coeff(v=extract,index=index,n.coeff=ncol(psi_mat),
cov.names=gsub('Intercept','Int',colnames(psi_mat)),b.string="a",b.count=0)
index<-grep("^B",extract);
p.coeff<-get_coeff(v=extract,index=index,n.coeff=ncol(p_mat),
cov.names=gsub('Intercept','Int',colnames(p_mat)),b.string="b",b.count=0)
index<-grep("^E",extract)
omeg.coeff<-get_coeff(v=extract,index=index,n.coeff=ncol(omeg_mat),
cov.names=gsub('Intercept','Int',colnames(omeg_mat)),b.string="e",b.count=0)
index<-grep("^F",extract)
conf.coeff<-get_coeff(v=extract,index=index,n.coeff=ncol(conf_mat),
cov.names=gsub('Intercept','Int',colnames(conf_mat)),b.string="e",b.count=0)
names<-c(rownames(psi.coeff),rownames(p.coeff),rownames(omeg.coeff),rownames(conf.coeff))
### get_VC matrix
instns<-grep("Variance-Covariance Matrix of",v,fixed=TRUE)
VC<-get_VC(v=v,offset=instns+1,n.coeff=npar,cov.names=names)
psi.VC<-VC[psi.coeff[,"index"],psi.coeff[,"index"]]
p.VC<-VC[p.coeff[,"index"],p.coeff[,"index"]]
omeg.VC=VC[omeg.coeff[,"index"],omeg.coeff[,"index"]]
conf.VC=VC[conf.coeff[,"index"],conf.coeff[,"index"]]
## Get real parameter estimates from output
i=grep("parameter +site +estimate",v)[1]; v=v[i:length(v)]
parse_est <- function(parm,v) {
i=grep(paste0('^',parm),v); vv=v[i];
v2=gsub('D0-','D0 -',gsub('^ ','',gsub(' +',' ',gsub('.+:','',gsub(' - ','',vv)))))
v3=matrix(as.numeric(unlist(strsplit(v2,' '))),ncol=4,byrow=T)
rownames(v3)=gsub(' .+','',vv); colnames(v3)=c('est','se','lowCI','hiCI')
return(v3)
}
psi.est=parse_est('psi',v)
i=grep("parameter +site +estimate",v)[2]; v=v[i:length(v)]
p.est<-parse_est('[pr][AB]',v)
omeg.est=parse_est('o[aAbB]',v)
conf.est=parse_est('c[AB]',v)
##### check for warnings
warn.conv<-check_conv_warn(v)
warn.VC<-check_VC_warn(v)
result<-list(modname=modname,
model=list(psi=psi,p=p),dmat=list(psi=psi.dm,p=p.dm,o=o.dm,c=c.dm),
data=temp.pao,outfile=outfile,
neg2loglike=neg2loglike,
npar=npar, aic=aic,
beta=list(psi=as.data.frame(psi.coeff),psi.VC=psi.VC,
p=as.data.frame(p.coeff),p.VC=p.VC,
omeg=as.data.frame(omeg.coeff),omeg.VC=omeg.VC,VC=VC,
conf=as.data.frame(conf.coeff),conf.VC=conf.VC,VC=VC),
real=list(psi=as.data.frame(psi.est),
p=as.data.frame(p.est),
omeg=as.data.frame(omeg.est),
conf=as.data.frame(conf.est)),
warnings=list(conv=warn.conv,VC=warn.VC))
class(result)<-c("occMod","so2spfp")
return(result)
}