**(y), not y. To get the corresponding parameters for the original-scale data, you have to transform the estimated parameters. And the prior on the mean and precision have to be appropriate for log(y), as opposed to y. This post shows an example of a JAGS program (with rjags) for estimating the parameters of log-normal distributed data.**

*log*This post updates the R+JAGS code from a previous post. Please see that previous post for more background. The previous post had code appropriate for the 1st edition of the book; this post updates a few details for use with the 2nd edition of the book. In particular, the new code shows new MCMC diagnostic plots and uses a more appropriate prior on meanOfLogY. Here are some of its output graphs; R code follows below.

Here is the full R script:

**#------------------------------------------------------------------------------**

# Jags-Ymet-Xnom1grp-MlogNormal-Script.R

# April 19, 2016. John K. Kruschke.

# Requires auxiliary R scripts that accompany the book,

# Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:

# A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.

# graphics.off()

# rm(list=ls(all=TRUE))

source("DBDA2E-utilities.R")

fileNameRoot="Jags-Ymet-Xnom1grp-MlogNormal-Script"

graphFileType="png"

#------------------------------------------------------------------------------

# THE DATA.

# Generate some random data from known parameter values:

set.seed(47405)

trueLogM = 5.0 # true mean of log(y)

trueLogSD = 0.5 # true sd of log(y)

y = rnorm( n=125 ) # normal distribution of log-scale values

LogY = (y-mean(y))/sd(y)*trueLogSD + trueLogM

y = exp(LogY) # y is exponentiated values of log(y)

# OR JUST PUT IN YOUR ACTUAL DATA HERE:

# y = ...

# Package the data for shipping to JAGS:

dataList = list(

y = y ,

N = length(y) ,

meanOfLogY = mean(log(y)) ,

sdOfLogY = sd(log(y))

)

#------------------------------------------------------------------------------

# THE MODEL.

modelstring = "

model {

for( i in 1 : N ) {

y[i] ~ dlnorm( muOfLogY , 1/sigmaOfLogY^2 )

}

sigmaOfLogY ~ dunif( 0.001*sdOfLogY , 1000*sdOfLogY )

# Jags-Ymet-Xnom1grp-MlogNormal-Script.R

# April 19, 2016. John K. Kruschke.

# Requires auxiliary R scripts that accompany the book,

# Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:

# A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.

# graphics.off()

# rm(list=ls(all=TRUE))

source("DBDA2E-utilities.R")

fileNameRoot="Jags-Ymet-Xnom1grp-MlogNormal-Script"

graphFileType="png"

#------------------------------------------------------------------------------

# THE DATA.

# Generate some random data from known parameter values:

set.seed(47405)

trueLogM = 5.0 # true mean of log(y)

trueLogSD = 0.5 # true sd of log(y)

y = rnorm( n=125 ) # normal distribution of log-scale values

LogY = (y-mean(y))/sd(y)*trueLogSD + trueLogM

y = exp(LogY) # y is exponentiated values of log(y)

# OR JUST PUT IN YOUR ACTUAL DATA HERE:

# y = ...

# Package the data for shipping to JAGS:

dataList = list(

y = y ,

N = length(y) ,

meanOfLogY = mean(log(y)) ,

sdOfLogY = sd(log(y))

)

#------------------------------------------------------------------------------

# THE MODEL.

modelstring = "

model {

for( i in 1 : N ) {

y[i] ~ dlnorm( muOfLogY , 1/sigmaOfLogY^2 )

}

sigmaOfLogY ~ dunif( 0.001*sdOfLogY , 1000*sdOfLogY )

**muOfLogY ~ dnorm( meanOfLogY , 1/(10*sdOfLogY)^2 ) # updated 8/16/2017**

**muOfY <- exp(muOfLogY+sigmaOfLogY^2/2)**

modeOfY <- exp(muOfLogY-sigmaOfLogY^2)

sigmaOfY <- sqrt(exp(2*muOfLogY+sigmaOfLogY^2)*(exp(sigmaOfLogY^2)-1))

}

" # close quote for modelstring

writeLines(modelstring,con="model.txt")

#------------------------------------------------------------------------------

# INTIALIZE THE CHAINS.

# Let JAGS do it

#------------------------------------------------------------------------------

# RUN THE CHAINS

require(rjags)

parameters = c("muOfLogY" , "sigmaOfLogY" , "muOfY" , "modeOfY" , "sigmaOfY" )

adaptSteps = 1000 # Number of steps to "tune" the samplers.

burnInSteps = 1000 # Number of steps to "burn-in" the samplers.

nChains = 3 # Number of chains to run.

numSavedSteps=20000 # Total number of steps in chains to save.

thinSteps=1 # Number of steps to "thin" (1=keep every step).

nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.

# Create, initialize, and adapt the model:

jagsModel = jags.model( "model.txt" , data=dataList ,

n.chains=nChains , n.adapt=adaptSteps )

# Burn-in:

cat( "Burning in the MCMC chain...\n" )

update( jagsModel , n.iter=burnInSteps )

# The saved MCMC chain:

cat( "Sampling final MCMC chain...\n" )

mcmcCoda = coda.samples( jagsModel , variable.names=parameters ,

n.iter=nPerChain , thin=thinSteps )

#------------------------------------------------------------------------------

# EXAMINE THE RESULTS

# Display diagnostics of chain, for specified parameters:

parameterNames = varnames(mcmcCoda) # get all parameter names

for ( parName in parameterNames ) {

diagMCMC( codaObject=mcmcCoda , parName=parName ,

saveName=fileNameRoot , saveType=graphFileType )

}

# Convert coda-object codaSamples to matrix object for easier handling.

mcmcChain = as.matrix( mcmcCoda )

chainLength = NROW(mcmcChain)

openGraph(width=10,height=6)

layout(matrix(1:6,nrow=2,byrow=TRUE))

# posterior predictive

hist( dataList$y , xlab="y" , main="Data w. Post. Pred." , breaks=30 ,

col="pink" , border="white" , prob=TRUE , cex.lab=1.5)

pltIdx = floor(seq(1,chainLength,length=20))

xComb = seq( min(dataList$y) , max(dataList$y) , length=501 )

for ( chnIdx in pltIdx ) {

lines( xComb ,

dlnorm( xComb, mcmcChain[chnIdx,"muOfLogY"], mcmcChain[chnIdx,"sigmaOfLogY"] ),

col="skyblue" )

}

# param's of log(y)

postInfo = plotPost( mcmcChain[,"muOfLogY"] , xlab="mu of log(y)" )

postInfo = plotPost( mcmcChain[,"sigmaOfLogY"] , xlab="sigma of log(y)" )

# param's of y

postInfo = plotPost( mcmcChain[,"modeOfY"] , xlab="mode of y" )

postInfo = plotPost( mcmcChain[,"muOfY"] , xlab="mu of y" )

postInfo = plotPost( mcmcChain[,"sigmaOfY"] , xlab="sigma of y" , cenTend="mode")

saveGraph(file=fileNameRoot,type=graphFileType)

modeOfY <- exp(muOfLogY-sigmaOfLogY^2)

sigmaOfY <- sqrt(exp(2*muOfLogY+sigmaOfLogY^2)*(exp(sigmaOfLogY^2)-1))

}

" # close quote for modelstring

writeLines(modelstring,con="model.txt")

#------------------------------------------------------------------------------

# INTIALIZE THE CHAINS.

# Let JAGS do it

#------------------------------------------------------------------------------

# RUN THE CHAINS

require(rjags)

parameters = c("muOfLogY" , "sigmaOfLogY" , "muOfY" , "modeOfY" , "sigmaOfY" )

adaptSteps = 1000 # Number of steps to "tune" the samplers.

burnInSteps = 1000 # Number of steps to "burn-in" the samplers.

nChains = 3 # Number of chains to run.

numSavedSteps=20000 # Total number of steps in chains to save.

thinSteps=1 # Number of steps to "thin" (1=keep every step).

nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.

# Create, initialize, and adapt the model:

jagsModel = jags.model( "model.txt" , data=dataList ,

n.chains=nChains , n.adapt=adaptSteps )

# Burn-in:

cat( "Burning in the MCMC chain...\n" )

update( jagsModel , n.iter=burnInSteps )

# The saved MCMC chain:

cat( "Sampling final MCMC chain...\n" )

mcmcCoda = coda.samples( jagsModel , variable.names=parameters ,

n.iter=nPerChain , thin=thinSteps )

#------------------------------------------------------------------------------

# EXAMINE THE RESULTS

# Display diagnostics of chain, for specified parameters:

parameterNames = varnames(mcmcCoda) # get all parameter names

for ( parName in parameterNames ) {

diagMCMC( codaObject=mcmcCoda , parName=parName ,

saveName=fileNameRoot , saveType=graphFileType )

}

# Convert coda-object codaSamples to matrix object for easier handling.

mcmcChain = as.matrix( mcmcCoda )

chainLength = NROW(mcmcChain)

openGraph(width=10,height=6)

layout(matrix(1:6,nrow=2,byrow=TRUE))

# posterior predictive

hist( dataList$y , xlab="y" , main="Data w. Post. Pred." , breaks=30 ,

col="pink" , border="white" , prob=TRUE , cex.lab=1.5)

pltIdx = floor(seq(1,chainLength,length=20))

xComb = seq( min(dataList$y) , max(dataList$y) , length=501 )

for ( chnIdx in pltIdx ) {

lines( xComb ,

dlnorm( xComb, mcmcChain[chnIdx,"muOfLogY"], mcmcChain[chnIdx,"sigmaOfLogY"] ),

col="skyblue" )

}

# param's of log(y)

postInfo = plotPost( mcmcChain[,"muOfLogY"] , xlab="mu of log(y)" )

postInfo = plotPost( mcmcChain[,"sigmaOfLogY"] , xlab="sigma of log(y)" )

# param's of y

postInfo = plotPost( mcmcChain[,"modeOfY"] , xlab="mode of y" )

postInfo = plotPost( mcmcChain[,"muOfY"] , xlab="mu of y" )

postInfo = plotPost( mcmcChain[,"sigmaOfY"] , xlab="sigma of y" , cenTend="mode")

saveGraph(file=fileNameRoot,type=graphFileType)

**#-------------------------------------------------------------------------------**

Hi John,

ReplyDeleteThank you very much for this code. It is helping me to estimate a log-normal distribution of some real data I want to analyze. Once you have run your code on some data Y, how can you ask the predicted posterior distribution about the probability that the data are above a certain value?

Kind regards

If I understand your question, do it as follows. First, call the value you're wondering about v. Then, at every step in the MCMC chain, compute the cumulative probability distribution in the lognormal distribution above v. That gives you a posterior distribution on the predicted probability that y>v. The cumulative probability above v is 1-Phi((log(v)-mu)/sigma), for mu and sigma on the log(y) scale.

DeleteI can do it like this (I want to calculate prob x>30 for two data-sets):

ReplyDeletegrid <- seq(0,100,.1)

plot(grid,dlnorm(grid,0.231,1.44),type="l",xlab="Compound (microgr/m3)",ylab="f(x)", ylim=c(0,0.05), lwd=2)

lines(grid,dlnorm(grid,1.95,1.58),col='red', lwd=3)

abline(v=30, col='blue', lwd=2)

legend("topright",c("July-2006 Density","January-2017 Density"),lty=1,col=1:2)

JUL <- plnorm(30, meanlog = 0.231, sdlog = 1.44, lower.tail = FALSE, log.p = FALSE)

JAN <- plnorm(30, meanlog = 1.95, sdlog = 1.58, lower.tail = FALSE, log.p = FALSE)

JAN/JUL

But, is there a different way to do it (asking all posterior draws)?

I forgot to add that I obained the meanlog and sdlog values from your code: I used meanlog= mu of log(y) and sdlog= sigma of log(y).

Delete