# Hair and eye color example ####

## Create the data table
N <- matrix(c(326, 688, 343, 98,
              38, 116, 84, 48,
              241,584, 909, 403, 
              110, 188, 412, 681,
              3, 4, 26, 85), 
            ncol=4, 
            byrow=TRUE)
colnames(N) <- c("Blue", "Light", "Medium", "Dark")
rownames(N) <- c("Fair", "Red", "Medium", "Dark", "Black")

N

x <- chisq.test(N)


# The mean square contingency coefficient

x$statistic /
  sum(N)

## CA step by step ####

## Step 0. Create our data table, from Le Roux & Rouanet (2004:52)

N <- matrix(c(326, 688, 343, 98,
              38, 116, 84, 48,
              241,584, 909, 403, 
              110, 188, 412, 681,
              3, 4, 26, 85), 
            ncol=4, 
            byrow=TRUE)
colnames(N) <- c("Blue", "Light", "Medium", "Dark")
rownames(N) <- c("Fair", "Red", "Medium", "Dark", "Black")

# Look at the matrix
N


## Optional: Save the data to a txt-file:

write.table(
  x = N,
  file = "../data/hair_eyecolor.txt"
)

# Read the data from a txt-file:
N <- as.matrix(
  read.table(
    file = "../data/hair_eyecolor.txt"
  )
)

N



## Step 0 and a half. Run CA from FactoMineR

## (this is cheating...)
FactoMineR::CA(N)


## Step 1. Calculate the total sum, which should add up to 5387 (ibid.:52)

n = sum(N)
n

### Step 2. Thanks to our previous calculation, it is now easy to 
### calculate a table of proportions by dividing all cells by the total sum $n$.

P = N / n
P

## Step 3. Thanks to our previous... we can now calculate a) column masses


column.masses = colSums(P)
column.masses

## and b) row masses,

row.masses = rowSums(P)
row.masses

## Step 4. Calculate expected values, given the marginal distributions 
## (column and row masses).

E = row.masses %o% column.masses # outer product of...
E

## Basically, if there was no association between hair and eye color, we would 
## get the values expected from the distribution of each variable, 
## i.e., hair color and eye color respectively.

## Step 5. How much do the observed values differ from the expected values? Let us find out.


R = P - E
R

## We can definitely see some differences, but they are tiny. Still points 
## to some association between rows and columns.

## Step 6. Calculate indexed residuals, because small differences in small 
## cells might be large (and large differences in large cells may be small). 
## We divide residuals by their expected values.

I = R / E
I

## Now we are talking!

## Step 7. Prepare for the magic of CA: calculate the standardized residuals 
## (a way of mitigating outliers, for example).


Z = I * sqrt(E)
Z

## Step 8. The magic of CA! Everything before this step, and everything 
## after, is highly intuitive and based on simple arithmetic. Now we enter 
## the realm of linear algebra and perform a singular value decomposition. 
## (This means we try to find uncorrelated variables from correlated ones, or 
## axes if you will.)

## And no, I won't do it "by hand" this time, as I will call the R function `svd()`.
SVD = svd(Z)
rownames(SVD$u) = rownames(P)
rownames(SVD$v) = colnames(P)

SVD

## This is, in some sense, where we get coordinates from. We will do 
## some additional operations to get our final coordinates, but these are 
## the numbers that we scale or standardize to get coordinates to project 
## in our graphs.

## Let us first get some eigenvalues too, though.

# Eigenvalues
eigenvalues = SVD$d^2
eigenvalues


## Do they match the ones in Le Roux & Rouanet 2004, p 53?


sum(eigenvalues)

## Step 9. Time for coordinates!

# Standard coordinates

standard.coordinates.rows = sweep(SVD$u, 1, sqrt(row.masses), "/")

standard.coordinates.columns = sweep(SVD$v, 1, sqrt(column.masses), "/")

# Principal coordinates

principal.coordinates.rows = sweep(standard.coordinates.rows, 2, SVD$d, "*")

principal.coordinates.columns = sweep(standard.coordinates.columns, 2, SVD$d, "*")

## Do they match the ones in Le Roux & Rouanet 2004, p 53?

principal.coordinates.rows

plot(as.data.frame(principal.coordinates.columns[,1:2]), col='red', pch=16, xlim=c(1.25, -1.25), ylim=c(-0.4, 0.4))
points(as.data.frame(principal.coordinates.rows[,1:2]), col='blue', pch=16)

## And now we compare to the FactoMineR CA again:

FactoMineR::CA(N)

## Voila!

## Shout-out to Tim Bock (CA step by step) and Aaron Schlegel (SVD step by step).

## SVD step by step

## The SVD may be thought of as a technique of obtaining uncorrelated variables from correlated ones (finding the axes!).

# SVD step by step
# Thanks https://rpubs.com/aaronsc32/singular-value-decomposition-r

ATA <- t(Z) %*% Z

ATA.e <- eigen(ATA)
v.mat <- ATA.e$vectors
v.mat

# v.mat[,1:2] <- v.mat[,1:2] * -1
# v.mat

AAT <- Z %*% t(Z)
AAT

AAT.e <- eigen(AAT)
u.mat <- AAT.e$vectors
u.mat

u.mat <- u.mat[,1:3]

r <- sqrt(ATA.e$values)
r <- r * diag(length(r))[,1:4]
r

# And square r
r^2