https://github.com/cran/tawny
Tip revision: 607db90a86a5f670c56cd232c6d9032a524c6285 authored by Brian Lee Yung Rowe on 02 March 2009, 00:00:00 UTC
version 1.0
version 1.0
Tip revision: 607db90
framework.R
# Look at rmetrics.org
#library(PerformanceAnalytics)
library(futile)
library(zoo)
library(quantmod)
# TODO
# x. Modify optimizePortfolio to use rollapply
# x. Modify optimizePortfolio.RMT to use rollapply
# x. Create portfolio function using zoo/xts
# x. Auto generate equal-weighted portfolio to compare with
# . Time-awareness in ensure so that keeping a session open does the right
# thing
# . Verify/stress test code
# x. Clean up code and comments
# x. Clean up logging
# . Provide example using quantmod
# x. Use PerformanceAnalytics for portfolio performance (LATER)
# x. Possibly modify chart (filter.RMT, mp.theory) to use quantmod -
# doesn't seem to be compatible given non-time series nature of some of the
# plots
# x. Disable charting of fit as an option
# x. Dates for weights are incorrect (why?)
# x. Line up dates
# h <- getPortfolioReturns(getIndexComposition('^DJI'), obs=400, reload=TRUE)
# ws <- optimizePortfolio(h, 350, getCorFilter.RMT() )
# pf <- plotPerformance(h, ws, 350, y.min=-0.4)
# ef <- compare.EqualWeighted(h, 350, y.min=-0.4)
# mf <- compare.Market('^GSPC',400,350, y.min=-0.4)
##---------------------------- PUBLIC FUNCTIONS -----------------------------##
# Optimize a portfolio using the specified portfolio generator and correlation
# matrix generator. This is a generic optimizer that allows any custom generator
# of correlation matrices to be used.
# Returns current log level of package
#logLevel <- function(new.level=NULL)
#{
# if (! is.null(new.level)) { options('log.level'=new.level) }
#
# if (is.null(getOption('log.level'))) { return(0) }
# return(getOption('log.level'))
#}
# Set with options('use.plots'=FALSE)
# Defaults to TRUE
#usePlots <- function(new.val=NULL)
#{
# if (! is.null(new.val)) { options('use.plots'=new.val) }
#
# if (is.null(getOption('use.plots'))) { return(TRUE) }
# return(getOption('use.plots'))
#}
# Ensures that a given series exists and downloads via quantmod if it doesn't
# serie - Either a string or a list
ensure <- function(serie, src='FRED', reload=FALSE, ...)
{
for (series in serie)
{
if (exists('from')) from <- get('from')
if (exists('to')) to <- get('to')
# Force a reload under certain situations
cleaned <- sub('^','', series, fixed=TRUE)
if (! exists(cleaned)) reload <- TRUE
else if (! 'zoo' %in% class(get(cleaned)) ) reload <- TRUE
else if (! 'Date' %in% class(start(get(cleaned))) ) reload <- TRUE
else if (! 'Date' %in% class(end(get(cleaned))) ) reload <- TRUE
else if (exists('from')) { if (start(get(cleaned)) > from) reload <- TRUE }
else if (exists('to')) { if (end(get(cleaned)) < to) reload <- TRUE }
if (! reload) next
cat("(Re)loading symbol",series,"from",src,"\n")
getSymbols(series, src=src, ...)
}
}
# Example
# Get SP500 components
# sp500.idx <- getIndexComposition()
# Get DOW components
# dow.idx <- getIndexComposition('^DJI')
# Get FTSE components
# ftse.idx <- getIndexComposition('^FTSE')
# Get HSI components
# hsi.idx <- getIndexComposition('^HSI')
# h <- getPortfolioReturns(getIndexComposition('^DJI'), obs=100)
getIndexComposition <- function(ticker='^GSPC', hint=NA, src='yahoo')
{
if (is.na(hint))
{
hints <- c(500, 30, 102, 42)
names(hints) <- c('^GSPC', '^DJI', '^FTSE', '^HSI')
hint <- hints[ticker]
}
# http://download.finance.yahoo.com/d/quotes.csv?s=@%5EGSPC&f=sl1d1t1c1ohgv&e=.csv&h=0
base <- 'http://download.finance.yahoo.com/d/quotes.csv?s=@'
formats <- '&f=sl1d1t1c1ohgv&e=.csv&h='
comp <- NULL
pages = max(1, hint %/% 50)
for (page in 1:pages)
{
start <- (page-1) * 50 + 1
url <- paste(base, ticker, formats, start, sep='')
cat("Loading page",page,"for",ticker,"\n")
data <- read.csv(url, header=FALSE)
# This is here due to a bug in Yahoo's download where the first record gets
# duplicated in each subsequent page
idx = 2; if (page == 1) { idx = 1 }
comp <- rbind(comp, data[idx:anylength(data),])
}
as.character(comp[,1])
}
# This produces a portfolio in matrix format (t x m) as a zoo class.
# Params
# symbols: A vector of symbols to retrieve. This uses quantmod to retrieve
# the data.
# obs: The number of observations that you want. Use this if you want the
# number of points to be explicit. Either obs or start is required.
# start: The start date, if you know that explicitly. Using this will ensure
# that the data points are bound to the given range but the precise number
# of points will be determined by the number of trading days.
# end: The most recent date of observation. Defaults to current day.
# fun: A function to use on each symbol time series. Defaults to Cl to operate
# on close data. For expected behavior, your function should only return
# one time series.
# TODO:
# Fix names
# Add method to add other portfolio elements (such as synthetic securities)
getPortfolioReturns <- function(symbols, obs=NULL, start=NULL, end=Sys.Date(),
fun=function(x) Delt(Cl(x)), reload=FALSE, na.value=NA, ...)
{
if (is.null(start) & is.null(obs)) { stop("Either obs or start must be set") }
end <- as.Date(end)
# Estimate calendar days from windowed business days. The 10 is there to
# ensure enough points, which get trimmed later
if (is.null(start)) { start <- end - (10 + obs * 365/250) }
#ensure(symbols, src='yahoo', reload=reload, from=start, to=end, ...)
# Merge into a single zoo object
p <- xts(order.by=end)
for (s in symbols)
{
asset <- getSymbols(s, from=start, to=end, auto.assign=FALSE)
if (logLevel() > 0) cat("Binding",s,"for ")
raw <- fun(asset)
if (logLevel() > 0)
cat("[",format(start(raw)),",",format(end(raw)),"]\n",sep='')
a <- xts(raw, order.by=index(asset))
p <- cbind(p, a[2:anylength(a)])
}
colnames(p) <- symbols
# First remove dates that have primarily NAs (probably bad data)
o.dates <- rownames(p)
p <- p[apply(p, 1, function(x) sum(x, na.rm=TRUE) != 0), ]
cat("Removed suspected bad dates",setdiff(o.dates,rownames(p)),"\n")
if (! is.na(na.value))
{
#for (s in symbols) p[,s][is.na(p[,s])] <- na.value
p[is.na(p)] <- 0
cat("Replaced NAs with",na.value,"\n")
}
else
{
# NOTE: This has consistency issues when comparing with a market index
o.dates <- rownames(p)
p <- p[apply(p, 1, function(x) sum(is.na(x)) < 0.1 * length(x) ), ]
cat("Removed dates with too many NAs",setdiff(o.dates,rownames(p)),"\n")
# Now remove columns with NAs
nas <- apply(p, 2, function(x) !any(is.na(x)) )
p <- p[,which(nas == TRUE)]
cat("Removed symbols with NAs:",setdiff(symbols,anynames(p)),"\n")
}
if (is.null(obs)) { return(p[paste(start,end, sep='::')]) }
p <- p[index(p) <= end]
idx.inf <- anylength(p) - min(anylength(p), obs) + 1
idx.sup <- anylength(p)
if (logLevel() > 0) cat("Returning rows [",idx.inf,",",idx.sup,"]\n")
cat("Loaded portfolio with",ncol(p),"assets\n")
p[idx.inf:idx.sup, ]
}
# Example
# h <- getPortfolioReturns(c('MER','C','GS','MS','BAC','AAPL','GOOG','MSFT','IBM','CSCO'),150)
## Not run:
# h <- getPortfolioReturns(c('MER','C','GS','MS','BAC','WFC',
# 'ORCL','YHOO','T','AAPL','GOOG','MSFT','IBM','CSCO','INTC',
# 'GM','F','CAT','MMM','XOM','CVX','RIG','USO','ABX',
# 'AMGN','HGSI','PFE','JNJ', 'FSLR','STP'), 150, reload=TRUE)
# c.gen <- getCorFilter.RMT(hint=c(4,1))
# weights <- optimizePortfolio(h, 100, c.gen)
## End(Not run)
# NOTE: For zoo compatibility, need to use a t x m matrix for h
optimizePortfolio <- function(h, window, cor.gen=getCorFilter.RMT(), ...)
{
if (! 'zoo' %in% class(h))
{ cat("WARNING: Zoo objects are preferred for dimensional safety\n") }
log.level <- logLevel()
my.optimizer <- function(h.window)
{
if (log.level > 2) { cat("Getting correlation matrix\n") }
cor.mat <- cor.gen(h.window, ...)
if (log.level > 2) { cat("Optimizing portfolio\n") }
p.optimize(h.window, cor.mat)
}
if (log.level > 0)
{
cat("Optimizing portfolio for",format(start(h)),"-",format(end(h)),"\n")
}
ws <- rollapply(h, window, my.optimizer, by.column=FALSE, align='right')
xts(ws, order.by=index(ws))
}
##----------------------------- TESTING FUNCTIONS ---------------------------##
# Plot the performance of the portfolio
# Params:
# h: A returns matrix. This can be either in-sample or out-of-sample
# weights: Either a vector or an m x t matrix of weights
# Usage:
# h <- some.returns.matrix # m x t
# ws <- optimizePortfolio.RMT(h)
# plotPerformance(h,ws)
# Example:
# load('RandomMatrix/spxReturnsQ5.rData')
# spx.small <- t(as.matrix(spx.retq5[1:250,][,1:50]))
# spx.small.history <- t(spx.retq5)[1:50,]
# NOTE: for zoo compatibility,
# h is t x m
# w is t x m
# ws <- optimizePortfolio(spx[1:350, 1:50], 250, getCorFilter.RMT())
# ps <- plotPerformance(spx[1:350, 1:50], ws)
# perf <- plotPerformance(h,weights,100, y.min=-0.75)
# The weights are currently expected to be the same as the last date in the
# window. Consequently, the weights necessarily are valid for T+1 at the
# earliest.
plotPerformance <- function(h, weights, window=NULL, rf.rate=0.01,
new.plot=TRUE, y.min=-0.25, y.max=0.25, bg=NULL,
name='', color='red', colors=c(), ...)
{
if (is.null(window)) { window <- anylength(h) - anylength(weights) + 1 }
if (! 'zoo' %in% class(h))
{ cat("WARNING: Zoo objects are preferred for dimensional safety\n") }
if (anylength(h) != anylength(weights) + window - 1)
{ cat("WARNING: Dimensions are inconsistent. This can cause errors\n") }
log.level <- logLevel()
ts.rets <- portfolioReturns(h, weights)
stats <- portfolioPerformance(ts.rets, window, rf.rate)
# Plot this output
if (log.level > 3)
{
cat("y range = [",y.min,",",y.max,"]\n")
#cat("x range = [",as.Date(start(xaxis)),",",as.Date(end(xaxis)),"]\n")
}
#yrange=c(min(y.min, ts.perf-0.05), max(y.max, ts.perf+0.05))
# Need to synchronize y bounds for multiple charts
yrange=c(y.min, y.max)
#if (! is.null(bg)) par(bg=bg, lab=c(7,7,7))
#else par(lab=c(7,7,7))
if (log.level > 3) { cat("Plotting chart\n") }
if (anylength(dev.list()) > 0 & new.plot & is.null(bg))
{
par(lab=c(7,7,7), new=new.plot)
lines(stats$cum.returns, col=color, ...)
}
else
{
if (!is.null(bg)) par(bg=bg)
par(lab=c(7,7,7))
plot(stats$cum.returns,
type="l",col=color,ylim=yrange,ylab="Return",xlab="Time", ...);
}
# Ignore line types for now
#stats$ltys <- c(ltys, lty)
stats$colors <- c(colors, name=color)
names(stats$colors)[anylength(stats$colors)] <- name
grid()
#legend('topright', legend=names(stats$colors), lwd=2, cex=1.0,
# col=stats$colors, border.col='slategray4', inset=0.02)
legend('topright', legend=names(stats$colors), lwd=2, cex=1.0,
col=stats$colors, inset=0.02)
stats
}
# Calculate portfolio returns based
portfolioReturns <- function(h, weights)
{
log.level <- logLevel()
if (log.level > 5) { cat("class(index(h)):",class(index(h)),"\n") }
# Shift dates so weights are used on following date's data for out-of-sample
# performance
w.index <- c(index(weights[2:anylength(weights)]), end(weights) + 1)
index(weights) <- w.index
h.trim <- h[index(h) %in% index(weights)]
ts.rets <- apply(zoo(h.trim) * zoo(weights), 1, sum)
# This is in here to fix some strange behavior related to rownames vs index
# in zoo objects and how they are used after an apply function
#names(ts.rets) <- index(h.trim)
#ts.rets <- zoo(ts.rets, order.by=as.Date(names(ts.rets)))
ts.rets <- zoo(ts.rets, order.by=index(h.trim))
# This causes problems
#ts.rets <- xts(t(h.trim * weights) %*% rep(1, anylength(h.trim)), order.by=index(h.trim))
if (any(is.na(ts.rets)))
{
cat("WARNING: Filling NA returns with 0\n")
ts.rets[is.na(ts.rets)] <- 0
}
if (log.level > 2)
{
cat("Returns count:",anylength(ts.rets),"\n")
#cat("Date count:",anylength(xaxis),"\n")
}
return(ts.rets)
}
portfolioPerformance <- function(p, window, rf.rate=0.01)
{
#require(PerformanceAnalytics, quietly=TRUE)
#ts.perf <- cumprod(1+p) - 1
#xaxis <- as.Date(index(ts.perf))
#xaxis <- as.Date(index(p))
stats <- list()
stats$daily.returns <- p
stats$cum.returns <- cumprod(1 + stats$daily.returns) - 1
stats$period.stdev <- as.numeric(sd(p, na.rm=TRUE))
stats$annual.stdev <- as.numeric(sd(p, na.rm=TRUE) * (252/window)^0.5)
#stats$drawdown <- maxDrawdown(p)
stats$avg.return <- mean(stats$daily.returns)
stats$period.return <- as.numeric(last(stats$cum.returns))
stats$annual.return <- (1 + stats$period.return)^(252/window) - 1
#stats$sharpe.ratio <- (stats$annual.return - rf.rate) / stats$annual.stdev
prf <- (1 + rf.rate)^(window/252) - 1
#stats$sharpe.ratio <- SharpeRatio.annualized(stats$daily.returns, rf=prf, scale=252)
return(stats)
}
# Compare with the given market
# mkt.perf <- compare.Market('^DJI',150,100)
# mkt.perf <- compare.Market(DJI,150,100, y.min=-0.75)
compare.Market <- function(market, obs, window, end=Sys.Date(), color='#44bc43', ...)
{
if (is.character(market))
{
start <- end - (10 + obs * 365/250)
mkt <- getSymbols(market, src='yahoo',from=start,to=end, auto.assign=FALSE)
}
else mkt <- market
mkt.ret <- Delt(Cl(mkt))
mkt.ret <- mkt.ret[index(mkt.ret) <= end]
mkt.ret <- tail(mkt.ret, obs)
w.count <- obs - window + 1
weights <-
xts(matrix(1,ncol=1,nrow=w.count), order.by=index(tail(mkt.ret, w.count)))
plotPerformance(mkt.ret, weights, window, color=color, name='market', ...)
}
# Compare the performance with an equal weighted portfolio
# eq.perf <- compare.EqualWeighted(h, 100, y.min=-0.75)
compare.EqualWeighted <- function(h, window, color='#342a31', ...)
{
my.weights <- function(x) rep(1/ncol(x), ncol(x))
weights <- xts(rollapply(h, window, my.weights, by.column=FALSE, align='right'), order.by=index(h[window:anylength(h)]))
if (logLevel() > 0)
{ cat("weights: [",as.Date(start(weights)),",",as.Date(end(weights)),"]\n") }
#index(weights) <- index(h[window:anylength(h)])
plotPerformance(h, weights, window, color=color, name='naive', ...)
}
##------------------------ PRIVATE PORTFOLIO FUNCTIONS ----------------------##
# Optimize the portfolio by identifying the minimum variance portfolio
# Params:
# h: TxM zoo returns matrix
# c.denoised: Denoised correlation matrix (m x m)
# Returns:
# Optimized weights vector for portfolio holdings. Sum of weights = 1.
p.optimize <- function(h, c.denoised)
{
#if (logLevel() > 0) cat("Optimizing for",as.Date(end(h)),"\n")
try(c.inv <- solve(c.denoised))
if (!exists('c.inv'))
{
cat("Unable to invert correlation matrix. Returning zeros.\n")
return(rep(0, ncol(h)))
}
sdna <- function(x) { sd(x,na.rm=TRUE) }
h.vol <- apply(h, 2, sdna)
sig.clean <- c.inv / (h.vol[col(c.inv)] * h.vol[row(c.inv)])
norm <- sum(sig.clean)
apply(sig.clean,1,sum) / norm
}