# Seber pages 506-507, after a Golub original

cancor <-
function(x, y, xcenter=TRUE, ycenter=TRUE)
{
	x <- as.matrix(x)
	y <- as.matrix(y)
	if(nrow(x) != nrow(y)) stop("unequal number of rows in cancor")
	nr <- nrow(x)
	ncx <- ncol(x)
	ncy <- ncol(y)
	if(is.logical(xcenter)) {
		if(xcenter) {
			xcenter <- apply(x, 2, mean)
			x <- x - rep(xcenter, rep(nr, ncx))
		}
		else xcenter <- rep(0, ncx)
	}
	else {
		xcenter <- rep(xcenter, length = ncx)
		x <- x - rep(xcenter, rep(nr, ncx))
	}
	if(is.logical(ycenter)) {
		if(ycenter) {
			ycenter <- apply(y, 2, mean)
			y <- y - rep(ycenter, rep(nr, ncy))
		}
		else ycenter <- rep(0, ncy)
	}
	else {
		ycenter <- rep(ycenter, length = ncy)
		y <- y - rep(ycenter, rep(nr,ncy))
	}
	qx <- qr(x)
	qy <- qr(y)
	dx <- qx$rank
	dy <- qy$rank
	# compute svd(Qx'Qy)
        z <- svd(qr.qty(qx, qr.qy(qy, diag(1, nr, dy)))[1:dx,, drop = F],
		dx, dy)
        list(cor = z$d,
		xcoef = backsolve((qx$qr)[1:dx, 1:dx, drop = F], z$u),
		ycoef = backsolve((qy$qr)[1:dy, 1:dy, drop = F], z$v),
		xcenter = xcenter,
		ycenter = ycenter)
}
"cmdscale"<-
function(d, k = 2, eig = FALSE)
{
	if(any(is.na(d)))
		stop("NA values not allowed in d")
	if(is.null(n <- attr(d, "Size"))) {
		x <- as.matrix(d)
		if((n <- nrow(x)) != ncol(x))
			stop("Distances must be result of dist or a square matrix")
	}
	else {
		x <- matrix(0, n, n)
		x[row(x) > col(x)] <- -0.5 * d^2
		x <- x + t(x)
	}

	storage.mode(x) <- "double"
	e <- eigen(.C("dblcen", x, as.integer(n))[[1]])
	ev <- e$values[n:(n-k+1)]
	points <- e$vectors[,n:(n-k+1)] %*% diag(sqrt(ev))
	if(eig) list(points=points, eig=ev)
	else points
}
dist <-
function(x, method="euclidian")
{
	method <-  pmatch(method, c("euclidian", "maximum",
		"manhattan", "canberra", "binary"))
	if(is.na(method))
		stop("invalid distance method")
	if(method == -1)
		stop("ambiguous distance method")

	x <- as.matrix(x)
	len <- nrow(x)*(nrow(x) - 1)/2

	d <- .C("dist",
		as.double(x),
		nrow(x),
		ncol(x),
		double(len),
		as.integer(method))[[4]]
	attr(d, "Size") <- nrow(x)
	attr(d, "Labels") <- dimnames(x)[[1]]
	class(d) <- "dist"
	return(d)
}

print.dist <-
function(d)
{
	size <- attr(d, "Size")
	df <- matrix(NA, size, size)
	df[row(df) > col(df)] <- d
	labels <- attr(d, "Labels")
	if(is.null(labels))
		dimnames(df) <- list(1:size,1:size)
	else
		dimnames(df) <- list(labels,labels)
	print(df[-1,-size], na="")
}

names.dist <-
function(d)
attr(d, "Labels")

"names<-.dist" <- function(d, n)
{
	if(length(n) != attr(d, "Size"))
		stop("invalid names for dist object")
	attr(d, "Labels") <- n
	d
}
# Hierarchical clustering, on raw input data; we will use Euclidean
# distance.  A range of criteria are supported; also there is a
# storage-economic option.
#
# We use the general routine, `hc', which caters for 7 criteria,
# using a half dissimilarity matrix; (BTW, this uses the very efficient
# nearest neighbor chain algorithm, which makes this algorithm of
# O(n^2) computational time, and differentiates it from the less
# efficient -- i.e. O(n^3) -- implementations in all commercial
# statistical packages -- as far as I am aware -- except Clustan.)
#
# Clustering Methods:
#
# 1. Ward's minimum variance or error sum of squares method.
# 2. single linkage or nearest neighbor method.
# 3. complete linkage or diameter.
# 4. average linkage, group average, or UPGMA method.
# 5. McQuitty's or WPGMA method.
# 6. median, Gower's or WPGMC method.
# 7. centroid or UPGMC method (7).
#
# Original author: F. Murtagh, May 1992
# R Modifications: Ross Ihaka, Dec 1996

hclust <-
function(d, method="complete")
{
        method <-  pmatch(method, c("ward", "single",
			"complete", "average", "mcquitty",
			"median", "centroid"))
        if(is.na(method))
                stop("invalid clustering method")
        if(method == -1)
                stop("ambiguous clustering method")

	n <- attr(d, "Size")
	if(is.null(n))
		stop("invalid dissimilarities")
	labels <- attr(d, "Labels")

	len <- n*(n-1)/2
	hcl <- .Fortran("hclust",
		n = as.integer(n),
		len = as.integer(len),
		method = as.integer(method),
		ia = integer(n),
		ib = integer(n),
		crit = double(n),
		membr = double(n),
		nn = integer(n),
		disnn = double(n),
		flag = logical(n),
		diss = as.double(d))

	# 2nd step: interpret the information that we now have
	# as merge, height, and order lists.

	hcass <- .Fortran("hcass2",
		n = as.integer(n),
		ia = as.integer(hcl$ia),
		ib = as.integer(hcl$ib),
		order = integer(n),
		iia = integer(n),
		iib = integer(n))

	tree <- list(
		merge=cbind(hcass$iia[1:(n-1)], hcass$iib[1:(n-1)]),
		height=hcl$crit[1:(n-1)],
		order=hcass$order,
		labels=attr(d, "Labels"))
	class(tree) <- "hclust"
	tree
}

plot.hclust <-
function(tree, hang=0.1, ...)
{
	merge <- tree$merge
	if(!is.matrix(merge) || ncol(merge) != 2)
		stop("invalid dendrogram")
	n <- nrow(merge)
	height <- as.double(tree$height)
	order <- as.double(order(tree$order))
	labels <- tree$labels
	if(is.null(labels)) labels <- 1:n
	labels <- as.character(labels)

	if(hang < 0) ylim <- range(height, 0)
	else {
		ylim <- range(height)
		hang <- hang * diff(ylim)
		ylim <- ylim - c(hang, 0)
	}

	plot.new()
	par2(...)
	plot.window(c(1, n+1), ylim, log="")

	.Internal(dend(n, merge, height, order, hang, labels))

	axis(2)
	invisible()
}
prcomp <- function(x, scale=FALSE, use="all.obs") {
	if(scale) cv <- cor(as.matrix(x), use=use)
	else cv <- cov(as.matrix(x), use=use)
	edc <- svd(cv)[c("d", "u")]
	cn <- paste("Comp.", 1:ncol(cv), sep="")
	vn <- dimnames(x)[[2]]
	names(edc$d) <- cn
	dimnames(edc$u) <- list(vn, cn)
	edc <- list(var=edc$d, load=edc$u, scale=scale)
	class(edc) <- "prcomp"
	edc
}

print.prcomp <- function(x) {
	cat("\nPrincipal Components:", if(x$scale) "Correlation" else "Covariance",
		"matrix\n\n")
	cat("Component Variances:\n")
	print(x$var)
	cat("\nLoadings:\n")
	print(x$load)
	cat("\n")
}

plot.prcomp <- function(x, main="Scree Plot", ylab="Variance",
		xlab="Component", ...) {
	plot(x$var, main=main, xlab=xlab, ylab=ylab, ...)
}
library.dynam("mva.dll")
provide(mva)
