# Main functions are
# robustmat for robust sparse reduced rank regression
# spca for robust sparse PCA (with covariance matrix calculated based on the truncated observations using the function truncate(y,tau))

#########################################################
# ADMM for Robust Sparse Reduced Rank Regression
# Y is the response matrix
# X is the covariates
# lambda and gamma are tuning parameters in Equation (5)
# lambda is for the rank 
# lambda*gamma is for the sparsity 
# tau is the truncation level
# rho is usualy chosen as a function of 1/(number of samples)
#########################################################
robustmat <- function(Y,X,lambda,gamma,tau,convergence=1e-7,maxiter=1000,rho){
	
	n <- nrow(Y)
	q <- ncol(Y)
	p <- ncol(X)
		
# Variables Initialization
	D <- UD <-  matrix(0,n,q)
	A <- Z <- W <- matrix(0,p,q)
	oldA <- matrix(1,p,q)
	UZ <- UW <- matrix(0,p,q)	
	criteria <- 1e10
	i <- 1
	
	tildeX <- rbind(X,diag(1,p,p),diag(1,p,p))
	solvetildeX <- solve(t(tildeX)%*%tildeX)
	
# While loop for the iterations
	while(criteria > convergence && i <= maxiter){
	
	A <- updateA(solvetildeX,tildeX,D,Z,W,UD,UZ,UW)
		
	Z <- updateZ(A,UZ,lambda,gamma,rho)
	
	W <- updateW(A,UW,lambda,rho)

	D <- updateD(Y,X,A,UD,rho,tau)
		
	UD <- UD+D-X%*%A
	
	UZ <- UZ+Z-A
	
	UW <- UW+W-A		
	
	criteria <- mean((A-oldA)^2)

	oldA <- A
	i <- i+1
		
	}	
	
	if(i>maxiter){
  	 warning("The algorithm has not converged by the specified maximum number of iteration")
  }

return(list(A=A,Z=Z,W=W,iteration=i))	
	
}


######################################
# Cross-validation
######################################
cv.robust <- function(X,Y,lambdas,gamma,nfold,loss="huber",seed,tau=3){
	
	set.seed(seed)
    p <- ncol(X)
    q <- ncol(Y) 
# Index for training vs test set
	fold <- rep(NA,nrow(Y))
	fold <- sample(rep(1:nfold,nrow(Y)/nfold))
	cv.error <- NULL
	
for(j in 1:nfold){
		print(paste("cross validation for dataset ",j,sep=""))
		error <- NULL
		tmp <- 1
		ytrain <- Y[which(fold!=j),]
		ytest <- Y[which(fold==j),]		
		Xtrain <- X[which(fold!=j),]		
		Xtest <- X[which(fold==j),]			
		ntrain <- nrow(Xtrain)
		if(loss=="huber"){
			tau <- tau
		}
		else if(loss=="sqerror"){
			tau <- 100000000
		}		
		
		for(lambda in lambdas){
		gammaupdate <- gamma*lambda
		model <- robustmat(ytrain,Xtrain,lambda,gammaupdate,tau,convergence=1e-7,maxiter=1000,rho=10/nrow(Xtrain))
		estA <- model$A
		predictedY <- Xtest%*%estA
		error <- c(error,sum((predictedY-ytest)^2))	
}
		cv.error <- rbind(cv.error,error)
	}
	
	return(list(cv.error=cv.error,lambdas=lambdas,gamma=gamma))
}

#####################################
# ADMM for sparse PCA 
# covX is either the sample covariance matrix or  truncated sample covariance matrix 
# lambda is a tuning parameter that controls sparsity
# K is the rank constraint.
#####################################
spca <- function(covX,lambda,K,rho=1,epsilon=1e-3,maxiter=1000,trace=FALSE,init=FALSE,initPi=NULL,initY=NULL,initU=NULL){
	
	p <- nrow(covX)
	criteria <- 1e10
	i <- 1
	
	# Initialize parameters
	Y <- Pi <- oldPi <-  diag(1,p,p)
	U <- matrix(0,p,p)
    
    if(init==TRUE){
       U <- initU
       Pi <- initPi
       Y <- initY
    }
	
	# While loop for the iterations
    	while(criteria > epsilon && i <= maxiter){

        Pi <- updatePi(covX,Y,Pi,U,K,rho)

		Y <- Soft(Pi+U,lambda/rho)

		U <- U + rho*(Pi - Y)

        criteria <- sqrt(sum((Pi-Y)^2))
        oldPi <- Pi
        i <- i+1
        if(trace==TRUE)
        {
 		    print(i)
			print(criteria)
        }
		
	}
	
	return(list(Pi=Pi,Y=Y,U=U,iteration=i,convergence=criteria))

}


######################################################
# Update A
######################################################
updateA <- function(solvetildeX,tildeX,D,Z,W,UD,UZ,UW){
	Theta <- rbind(D,Z,W)
	U <- rbind(UD,UZ,UW)
	return(solvetildeX%*%t(tildeX)%*%(Theta+U))
	
}

######################################################
# Update Z
######################################################
updateZ <- function(A,UZ,lambda,gamma,rho){
  temp <- A-UZ
  B <- lambda*gamma/rho
  S <- Soft(temp,B)
  return(S)	
}


######################################################
# Update D
######################################################
updateD <- function(Y,X,A,UD,rho,tau){
  n <- nrow(X)
  q <- ncol(A)
  C <- X%*%A-UD
  D <- matrix(0,n,q)
  index <- which(abs(n*rho*(Y-C)/(1+n*rho)) <= tau)
#  index2 <- which(abs(C)<= tau*(1+n*rho)/(n*rho))
  D[index] <- (Y + n*rho*C)[index]/(1+n*rho)
  D[-index] <- Y[-index] - Soft(Y-C,tau/(n*rho))[-index]
  return(D)	
}


######################################################
# Update W
######################################################
updateW <- function(A,UW,lambda,rho){
  C <- A-UW
  a <- svd(C)
  D <- diag(a$d)
  U <- a$u
  V <- a$v  
  L <- U%*%(pmax(D-lambda/rho,0))%*%t(V)			
  return(L)
}


######################################################
# Soft-thresholding Operator
######################################################
Soft <- function(a,b){
  if(b<0) stop("Can soft-threshold by a nonnegative quantity only.")
  return(sign(a)*pmax(0,abs(a)-b))
}


######################################################
# Update Pi
######################################################
updatePi <- function(covX,Y,Pi,U,K,rho){

	temp <- Y - U + covX/rho
	svdtemp <- eigen(temp)
	d <- svdtemp$values
	p <- length(d)
	
	if(sum(pmin(1,pmax(d,0)))<=K){
        dfinal <- pmin(1,pmax(d,0))
        return(svdtemp$vectors%*%diag(dfinal)%*%t(svdtemp$vectors))
	}
	
	fr <- function(x){
		sum(pmin(1,pmax(d-x,0)))
	}
	
# Vincent Vu Fantope Projection
	knots <- unique(c((d-1),d))
	knots <- sort(knots,decreasing=TRUE)
	temp <- which(sapply(knots,fr)<=K)
	lentemp <- tail(temp,1)
	a=knots[lentemp]
	b=knots[lentemp+1]
	fa <- sum(pmin(pmax(d-a,0),1))
	fb <- sum(pmin(pmax(d-b,0),1))
	theta <- a+ (b-a)*(K-fa)/(fb-fa)
	dfinal <- pmin(1,pmax(d-theta,0))
	res <- svdtemp$vectors%*%diag(dfinal)%*%t(svdtemp$vectors)
	return(res)
}	

# running using robust type covariance
truncate <- function(y,tau){
	y[which(y>tau)] <- tau
	y[which(y<(-tau))] <- -tau
	return(y)
}