#### Script to perform Multiple Correspondence Analysis with the package soc.ca
# Package dokumentation: https://cran.r-project.org/web/packages/soc.ca/soc.ca.pdf

# The practice dataset is the The Taste Example, also used in Le Roux & Rouanet (2010): Multiple Correspondences Analysis, Sage.
# The data involve Q = 4 active variables and K = 8 + 8 + 7 + 6 = 29 categories
# n = 1215 individuals (after removing supplementary individuals)


##### PREPARATIONS ####
#### Installation of packages (only once if yoy haven't done it before)
#install.packages("soc.ca")
#install.packages("tidyverse")

# Load packages
library(soc.ca)
library(tidyverse)



# Read the taste dataset that is included in the package

data(taste)

# removing the supplementary individuals 
taste_act <- taste %>% 
  filter(Isup == "Active") 


# Basic statistics 

summary(taste_act)


#Data.frames creation for active and supplementary variables (all variables have to be factors, with NO missing values, meaning that NAs must be put in a category called "Missing", or something else)


active <- taste_act %>% 
  select(TV, Film, Art, Eat)

sup <- taste_act %>% 
  select(Gender, Age, Income)

id <- taste_act %>% 
  select(ID)

# Option to define Passive modalities for specific MCA (SMCA). The default option in the function soc.ca is that all individuals with missing values will be set ass passive.
#options(passive = c("taste.active1: Missing", "taste.active2: Missing"))
#To define passive modalities, we have to create a vector with, between quote marks: "the exact name of the variable within the data.frame, colon, space, the exact name of the modality". There still can be conflicts. In that case, give another name to the variable/the modality, or to both.


# PERFORMING THE MCA

mca_taste <- soc.mca(
  active = active, # note, this is not a good object naming practice.
  sup = sup,       # -''-
  identifier = id) # this is better, because parameter != object name.

# General information on the MCA
print(mca_taste)

#There are a lot of values to get using mca_taste$...
#For example to get the names of the active modalities
mca_taste$names.mod

#Or the supplementary modalities
mca_taste$names.sup

#Etc.


# Getting the variance of axis (eigenvalues), variance rates and modified rates for the first 12 axis
variance(mca_taste, dim = c(1:12)) # See p.46 in Le Roux(2010)


# Interpretation of the 3 first axis

contribution(mca_taste, 1)
contribution(mca_taste, 2)
contribution(mca_taste, 3)
contribution(mca_taste, 1:5, mode = "variable")



#### Mapping cloud of categories, cloud of individuals and supplementary categories ####

## MAPPING THE ACTIVE CATEGORIES
# Cloud of categories plane 1,2

map.active(
  object = mca_taste,
  dim = c(1, 2),
  point.shape = "variable",
  point.alpha = 0.8,
  #point.fill = "whitesmoke",
  point.fill = mca_taste$variable, # Color points by active variable
  point.color = "black",
  point.size = "freq", 
  #point.size = mca_taste$ctr.mod[,1], # Size of point determined by contribution to the first axis 
  label = TRUE,
  label.repel = TRUE, # Avoid to much overlapp of variables
  label.alpha = 0.8,
  label.color = "black",
  label.size = 4,
  label.fill = NULL,
  map.title = "Active modalities plane 1-2",
  labelx = "default",
  labely = "default",
  legend = NULL
) 


# Cloud of categories plane 2,3

map.active(
  object = mca_taste,
  dim = c(3, 2),
  point.shape = "variable",
  point.alpha = 0.8,
  point.fill = "whitesmoke",
  #point.fill = mca_taste$variable, # Color points by active variable
  point.color = "black",
  point.size = "freq", 
  # point.size = mca_taste$ctr.mod[,1], # Size of point determined by contribution to the first axis 
  label = TRUE,
  label.repel = TRUE, # Avoid to much overlapp of variables
  label.alpha = 0.8,
  label.color = "black",
  label.size = 4,
  label.fill = NULL,
  map.title = "Active modalities plane 2-3",
  labelx = "default",
  labely = "default",
  legend = NULL
)

# It is also possible to map only those categories contributing above average on each axis

# Example for axis 1
map.ctr(mca_taste,
        dim = c(1, 2), 
        ctr.dim = 1, 
        point.shape = "variable",
        point.alpha = 1, 
        point.fill = "black", 
        point.color = "black",
        point.size = "freq", 
        label = TRUE, 
        label.repel = TRUE,
        label.alpha = 1, 
        label.color = "black", 
        label.size = 6,
        label.fill = NULL, 
        map.title = "Above average modalities: axis 1", 
        labelx = "default",
        labely = "default", 
        legend = NULL)


# Example for axis 1 and 2
map.ctr(mca_taste,
        dim = c(1, 2), 
        ctr.dim = 1:2, 
        point.shape = "variable",
        point.alpha = 1, 
        point.fill = "black", 
        point.color = "black",
        point.size = "freq", 
        label = TRUE, 
        label.repel = TRUE,
        label.alpha = 1, 
        label.color = "black", 
        label.size = 3,
        label.fill = NULL, 
        map.title = "Above average modalities: axis 1 and 2", 
        labelx = "default",
        labely = "default", 
        legend = NULL)


## MAPPING THE SUPPLEMENTARY CATEGORIES

map.sup(mca_taste, 
        dim = c(1, 2), 
        point.shape = "variable",
        point.alpha = 1, 
        point.fill = "black", 
        point.color = "black",
        point.size = "freq", 
        label = TRUE, 
        label.repel = TRUE,
        label.alpha = 1, 
        label.color = "black", 
        label.size = 4,
        label.fill = NULL, 
        map.title = "Space of supplementary variables. Axes 1 and 2", 
        labelx = "default",
        labely = "default", 
        legend = NULL)


map.sup(mca_taste, 
        dim = c(1, 2), 
        point.shape = "variable",
        point.alpha = 1, 
        point.fill = "black", 
        point.color = "black",
        point.size = "freq", 
        label = TRUE, 
        label.repel = TRUE,
        label.alpha = 1, 
        label.color = "black", 
        label.size = 4,
        label.fill = NULL, 
        map.title = "Space of supplementary variables. Axes 1 and 2", 
        labelx = "default",
        labely = "default", 
        legend = NULL)


## MAPPING ACTIVE AND SUPPLEMENTARY CATEGORIES IN SAME GRAPH

map.select(mca_taste, 
           dim = c(1, 2), 
           list.mod = TRUE, 
           list.sup = TRUE, 
           list.ind = NULL, 
           point.shape = "variable", 
           point.alpha = .8, 
           point.fill = "whitesmoke", 
           point.color = "black", 
           point.size = "freq", 
           label = TRUE, 
           label.repel = TRUE, 
           label.alpha = 1, 
           label.color = "black", 
           label.size = 5, 
           label.fill = NULL, 
           map.title = "Map selection of variables", 
           labelx = "default", 
           labely = "default", 
           legend = NULL)


## MAPPING THE CLOUD OF INDIVIDUALS

# Plane 1, 2
map.ind(
  mca_taste,
  dim = c(1, 2),
  point.shape = 21,
  point.alpha = 0.8,
  point.fill = "whitesmoke",
  point.color = "black",
  point.size = 2,
  label = FALSE,
  label.repel = FALSE,
  label.alpha = 0.8,
  label.color = "black",
  label.size = 4,
  label.fill = NULL,
  map.title = "Cloud of individuals plane 1-2",
  labelx = "default",
  labely = "default",
  legend = NULL
) 

# Plane 2, 3
map.ind(
  mca_taste,
  dim = c(3, 2),
  point.shape = 21,
  point.alpha = 0.8,
  point.fill = "whitesmoke",
  point.color = "black",
  point.size = 2,
  label = FALSE,
  label.repel = FALSE,
  label.alpha = 0.8,
  label.color = "black",
  label.size = 4,
  label.fill = NULL,
  map.title = "Cloud of individuals plane 2-3",
  labelx = "default",
  labely = "default",
  legend = NULL
)


# Colour ind based on age
map.ind(
  mca_taste,
  dim = c(1, 2),
  point.shape = 21,
  point.alpha = 0.8,
  point.fill = sup$Age,
  point.color = "black",
  point.size = 2,
  label = FALSE,
  label.repel = FALSE,
  label.alpha = 0.8,
  label.color = "black",
  label.size = 4,
  label.fill = NULL,
  map.title = "Cloud of individuals plane 1-2",
  labelx = "default",
  labely = "default",
  legend = sup$Age
) 

#MAP A SELECTION OF MODALITIES
#Here we look for the rank of supplementary modalities (or active, with "list.mod", or individuals, with "list.ind"), and we add a vector with their number:
mca_taste$names.sup
mca_taste$names.mod

# The TV-modalities and age
map.select(mca_taste, 
           dim = c(1, 2), 
           list.mod = c(9:16), 
           list.sup = c(3:8), 
           list.ind = NULL, 
           point.shape = "variable", 
           point.alpha = .8, 
           point.fill = "whitesmoke", 
           point.color = "black", 
           point.size = "freq", 
           label = TRUE, 
           label.repel = TRUE, 
           label.alpha = 1, 
           label.color = "black", 
           label.size = 5, 
           label.fill = NULL, 
           map.title = "Map selection of variables", 
           labelx = "default", 
           labely = "default", 
           legend = NULL)


#mapping a group of individuals, example all aged 65+
attach(taste_act)
oldies <- Age == "65+"

map.select(mca_taste, 
           dim = c(1, 2), 
           list.mod = NULL, 
           list.sup = NULL, 
           list.ind = oldies, 
           point.shape = 21, 
           point.alpha = .8, 
           point.fill = "whitesmoke", 
           point.color = "black", 
           point.size = 3, 
           label = FALSE, 
           label.repel = FALSE, 
           label.alpha = 1, 
           label.color = "black", 
           label.size = 5, 
           label.fill = NULL, 
           map.title = "Map selection of variables", 
           labelx = "default", 
           labely = "default", 
           legend = NULL)


# Concentration ellipse for a subset of the population
map <- map.ind(mca_taste)

map.ellipse(mca_taste, map, as.factor(taste_act$Age == "18-24" 
                                      ))

#### EXTRAS ###


#Map ellipses of supplementary variables in cloud of individuals
map.ellipse(mca_taste, 
            ca.plot = map.ind(mca_taste, 
                              point.size = 2, 
                              point.alpha = 0.8, 
                              point.color = "grey", 
                              point.fill = "grey"), 
            sup$Age, 
            ellipse.label = TRUE, 
            ellipse.color = sup$Age, 
            label.size = 4, 
            #draw.levels = 5:nlevels(sup$taste.Age), 
            ellipse.line = "solid")






#### CLUSTER ANALYSIS ####

library(cluster)

coords <- mca_taste$coord.ind[, 1:12]
#Clustering on the first ten axes
clust <- agnes(coords, method="ward")
layout(matrix(c(1,1)))
plot(clust,ask=F,which.plots=2)
clusters <- cutree(clust, k = 4)
table(clusters)
#  k = 4, or any other number of clusters
#Here we use Ward's algorithm. I also try with kmeans, or others.



#Color the map of individuals by cluster (with the Ward solution)
map.ind(mca_taste,  point.fill = as.factor(clusters), dim = c(1, 2), point.shape = 21, point.alpha = 1, point.size = 3, map.title = "Space of individuals, axes 1 and 2", labelx = "default", labely = "default", point.color = "black")

map.ind(mca_taste,  point.fill = as.factor(clusters), dim = c(1, 3), point.shape = 21, point.alpha = 1, point.size = 5, map.title = "Space of individuals, axes 1 and 3", labelx = "default", labely = "default", point.color = "black")



#And to have these clusters' ellipses
clusters_rec <- factor(clusters)
clusters_rec <- recode(clusters,
                       "1" = "Cluster 1",
                       "2" = "Cluster 2",
                       "3" = "Cluster 3", 
                       "4" = "Cluster 4",
                       )
table(clusters_rec)
sup_rec <- data.frame(sup, clusters_rec)

mca_taste.clusters <- soc.mca(active, sup_rec, id)

map.ellipse(mca_taste.clusters, 
            ca.plot = map.ind(mca_taste.clusters), 
            sup_rec$clusters_rec, 
            ellipse.label = TRUE, 
            ellipse.color = "black", 
            label.size = 5, 
            draw.levels = 4:nlevels(sup_rec$clusters_rec), 
            ellipse.line = "solid")


#Qualification of the clusters (through active and supplementary variables - or any other by the way)
library(FactoMineR)
Clusters_Qualification <- data.frame(active, sup_rec)

#Identify in the list the rank of the cluster variable. Let's say here 25.
names(Clusters_Qualification)
catdes(Clusters_Qualification_2,25,proba = 0.05)



#### Class specific analysis (CSA). For example on a supplementary variable Gender ####

SexWoman   <- which(sup$Gender == "Women")
CSA_Woman <- soc.csa(mca_taste, SexWoman, sup = NULL)
variance(CSA_Woman)
csa.measures(CSA_Woman, correlations = TRUE, cosines = TRUE, cosine.angles = TRUE, dim = 1:4, format = TRUE)

SexMan   <- which(sup$Gender == "Men")
CSA_Man <- soc.csa(mca_taste, SexMan, sup = NULL)
variance(CSA_Man)
csa.measures(CSA_Man, correlations = TRUE, cosines = TRUE, cosine.angles = TRUE, dim = 1:5, format = TRUE)


#Map contributing modalities
map.ctr(CSA_Woman, dim = c(1, 2), ctr.dim = 1, point.shape = "variable",
        point.alpha = 1, point.fill = "black", point.color = "black",
        point.size = "freq", label = TRUE, label.repel = TRUE,
        label.alpha = 1, label.color = "black", label.size = 3,
        label.fill = NULL, map.title = "CSA Women. Above Average Modalities: Axis 1", labelx = "default",labely = "default", legend = NULL)

map.ctr(CSA_Woman, dim = c(1, 2), ctr.dim = 2, point.shape = "variable",
        point.alpha = 1, point.fill = "black", point.color = "black",
        point.size = "freq", label = TRUE, label.repel = TRUE,
        label.alpha = 1, label.color = "black", label.size = 3,
        label.fill = NULL, map.title = "CSA Women. Above Average Modalities: Axis 2", labelx = "default",labely = "default", legend = NULL)

map.ctr(CSA_Woman, dim = c(1, 3), ctr.dim = 3, point.shape = "variable",
        point.alpha = 1, point.fill = "black", point.color = "black",
        point.size = "freq", label = TRUE, label.repel = TRUE,
        label.alpha = 1, label.color = "black", label.size = 3,
        label.fill = NULL, map.title = "CSA Women. Above Average Modalities: Axis 3", labelx = "default",labely = "default", legend = NULL)


map.ctr(CSA_Man, dim = c(1, 2), ctr.dim = 1, point.shape = "variable",
        point.alpha = 1, point.fill = "black", point.color = "black",
        point.size = "freq", label = TRUE, label.repel = TRUE,
        label.alpha = 1, label.color = "black", label.size = 3,
        label.fill = NULL, map.title = "CSA Men. Above Average Modalities: Axis 1", labelx = "default",labely = "default", legend = NULL)

map.ctr(CSA_Man, dim = c(1, 2), ctr.dim = 2, point.shape = "variable",
        point.alpha = 1, point.fill = "black", point.color = "black",
        point.size = "freq", label = TRUE, label.repel = TRUE,
        label.alpha = 1, label.color = "black", label.size = 3,
        label.fill = NULL, map.title = "CSA Men. Above Average Modalities: Axis 2", labelx = "default",labely = "default", legend = NULL)

map.ctr(CSA_Man, dim = c(1, 3), ctr.dim = 3, point.shape = "variable",
        point.alpha = 1, point.fill = "black", point.color = "black",
        point.size = "freq", label = TRUE, label.repel = TRUE,
        label.alpha = 1, label.color = "black", label.size = 3,
        label.fill = NULL, map.title = "CSA Men. Above Average Modalities: Axis 3", labelx = "default",labely = "default", legend = NULL)