################################################## # Distributed Smoothed Quantile Regression Paper # multigddist is the main for fitting distributed SQR # simdistirbuted is an example that is similar to that of Table1 ################################################## library(quantreg) library(conquer) library(MultiRNG) library(MASS) ############################################ ############################################ # Simulation code for Table 1 # kernel - kernel for conquer # kernel2 = none - using QR as global gradient # n number of samples per machine # p number of covvariates # m number of machines # This simulation is with $t$-distributed noise ############################################ ############################################ simdistributed <- function(n,p,m,seed,kernel="Gaussian",kernel2="Gaussian",tau=0.5){ set.seed(seed) N <- n*m h <- max(0.01,((p+log(N))/(N))^(1/3)) g <- max(0.01,((p+log(n))/n)^(1/3)) # Generate Data under Different Random Noise Sigma <- matrix(0.5,p+1,p+1) tmpmat <- matrix(0,p+1,p+1) for(i in 1:(p)){ tmpmat[i,i:(p+1)] <- c(0:(length(i:(p+1))-1)) } tmpmat = tmpmat+t(tmpmat) Sigma <- Sigma^tmpmat #X <- mvrnorm(n=N,mu=rep(0,p),Sigma=Sigma) X = sqrt(12) * draw.d.variate.uniform(N, p+1, Sigma) - sqrt(3) Xextra <- X[,p+1] X <- X[,1:p] Xinit = sqrt(12) * draw.d.variate.uniform(n, p+1, Sigma) - sqrt(3) Xextrainit <- Xinit[,p+1] Xinit <- Xinit[,1:p] beta <- rep(1,p) rnoise <- rt(N, 2) - qt(tau, 2) rnoiseinit <- rt(n, 2) - qt(tau, 2) y <- X%*%beta+ (0.2*X[,p]+1)*rnoise yinit <- Xinit%*%beta+ (0.2*Xinit[,p]+1)*rnoiseinit # Use one machine to come up with an initial estimator bbeta datay <- vector("list",m) dataX <- vector("list",m) for(i in 1:(m)){ datay[[i]] <- y[((i-1)*n+1):(i*n)] dataX[[i]] <- X[((i-1)*n+1):(i*n),] } # Come up with an initial beta bbetabar <- conquer(Xinit,yinit,tau=tau,kernel=kernel)$coeff # run conquer with full data fullbeta <- rq(y~X,tau=tau,method="pfn")$coefficients # Run quantreg for each machine and average them qrbeta <- qrbetacon <- NULL for(i in 1:m){ tmpres <- rq(datay[[i]]~dataX[[i]],tau=tau,method="pfn")$coefficients qrbeta <- rbind(qrbeta,tmpres) tmpres2 <-conquer(dataX[[i]],datay[[i]],tau=tau,kernel=kernel)$coeff qrbetacon <- rbind(qrbetacon,tmpres2) } qrbeta <- apply(qrbeta,2,mean) qrbetacon <- apply(qrbetacon,2,mean) initialbeta <- bbetabar Titer <- 4 dsqrbeta <- multigddist(datay,dataX,2.5*h,2.5*g,m,tau,initialbeta,kernel,kernel2,Titer) return(list(beta=beta,bbetabar=bbetabar,fullbeta=fullbeta,qrbeta=qrbeta,dsqrbeta=dsqrbeta)) } ############################################## ############################################## # Multiple iterations ############################################## ############################################## multigddist <- function(datay,dataX,h,g,m,tau,initialbeta,kernel,kernel2,Titer){ betares <- NULL gradientres <- NULL t <- 1 betainit <- initialbeta for(t in 1:Titer){ # Compute gradient for m machines using initialbeta gtmp <- computegradient(datay,dataX,h,g,m,tau,initialbeta=betainit,kernel=kernel2) betatmp <- gddist(datay[[1]],dataX[[1]],g,tau,gradient=gtmp,bbetabar=betainit,kernel=kernel)$beta betainit <- betatmp betares <- cbind(betares,betatmp) t <- t+1 print(t) } return(betares) } ############################################ ############################################ # Gradient Descent for Distributed Estimator # y - data for machine 1 # X - data for machine 1 # g - bandwidth for local machine 1 # tau - quantile level # bbetabar - initial estimator ############################################ ############################################ gddist <- function(y,X,g,tau,gradient,bbetabar,kernel="uniform",thres=1e-6,max.iter=1000){ m <- length(gradient) p <- ncol(X) n <- nrow(X) tmp <- matrix(unlist(gradient),nrow=p+1,ncol=m,byrow=FALSE) # adjustment <- gradient[[1]]-apply(tmp,1,mean) res <- y-bbetabar[1]-X%*%bbetabar[-1] if(kernel=="uniform"){ grad1 <- t(cbind(1,X))%*%unif(-res/g,tau)/n } if(kernel=="Gaussian"){ grad1 <- t(cbind(1,X))%*%gauss(-res/g,tau)/n } adjustment <- grad1-apply(tmp,1,mean) betaupdate <- bbetabar - (grad1-adjustment) betaold <- bbetabar iter <- 1 threshold <- 1e5 while(iter<=max.iter && threshold>thres){ # Compute Stepsize res <- y-betaold[1]-X%*%betaold[-1] res2 <- y-betaupdate[1]-X%*%betaupdate[-1] if(kernel=="uniform"){ grad <- t(cbind(1,X))%*%unif(-res/g,tau)/n - adjustment grad2 <- t(cbind(1,X))%*%unif(-res2/g,tau)/n - adjustment } if(kernel=="Gaussian"){ grad <- t(cbind(1,X))%*%gauss(-res/g,tau)/n - adjustment grad2 <- t(cbind(1,X))%*%gauss(-res2/g,tau)/n - adjustment } graddiff <- grad2-grad betadiff <- betaupdate-betaold eta1 <- sum((betadiff)^2)/(t(betadiff)%*%graddiff) eta2 <- t(betadiff)%*%graddiff/sum(graddiff^2) eta <- min(eta1,eta2,100) betaold <- betaupdate betaupdate <- betaupdate - eta*grad2 threshold <- sqrt(sum(grad^2)) iter <- iter+1 } #print(grad) return(list(beta=betaupdate,iter=iter,threshold=threshold)) } ############################################################ # Compute gradient function ############################################################ computegradient <- function(datay,dataX,h,g,m,tau,initialbeta,kernel="uniform"){ n <- length(datay[[1]]) gradient <- vector("list",m) for(i in 1:m){ res <- datay[[i]] - initialbeta[1]-dataX[[i]]%*%initialbeta[-1] if(kernel=="uniform"){ gradient[[i]] <- t(cbind(1,dataX[[i]]))%*%unif(-res/h,tau)/n } if(kernel=="Gaussian"){ gradient[[i]] <- t(cbind(1,dataX[[i]]))%*%gauss(-res/h,tau)/n } if(kernel=="none"){ gradient[[i]] <- t(cbind(1,dataX[[i]]))%*%(as.numeric(res<=0)-tau)/n } } return(gradient) } ######################################### # Define the loss under uniform and gaussian kernels ######################################### unif <- function(u,tau) { w <- as.numeric(1*(u>1) + (1+u)*(abs(u)<=1)/2 - tau) return (w) } gauss <- function(u,tau) { w <- pnorm(u) - tau return (w) }