#### Script to perform Multiple Correspondence Analysis with the package soc.ca # Package dokumentation: https://cran.r-project.org/web/packages/soc.ca/soc.ca.pdf # The practice dataset is the The Taste Example, also used in Le Roux & Rouanet (2010): Multiple Correspondences Analysis, Sage. # The data involve Q = 4 active variables and K = 8 + 8 + 7 + 6 = 29 categories # n = 1215 individuals (after removing supplementary individuals) ##### PREPARATIONS #### #### Installation of packages (only once if yoy haven't done it before) #install.packages("soc.ca") #install.packages("tidyverse") # Load packages library(soc.ca) library(tidyverse) # Read the taste dataset that is included in the package data(taste) # removing the supplementary individuals taste_act <- taste %>% filter(Isup == "Active") # Basic statistics summary(taste_act) #Data.frames creation for active and supplementary variables (all variables have to be factors, with NO missing values, meaning that NAs must be put in a category called "Missing", or something else) active <- taste_act %>% select(TV, Film, Art, Eat) sup <- taste_act %>% select(Gender, Age, Income) id <- taste_act %>% select(ID) # Option to define Passive modalities for specific MCA (SMCA). The default option in the function soc.ca is that all individuals with missing values will be set ass passive. #options(passive = c("taste.active1: Missing", "taste.active2: Missing")) #To define passive modalities, we have to create a vector with, between quote marks: "the exact name of the variable within the data.frame, colon, space, the exact name of the modality". There still can be conflicts. In that case, give another name to the variable/the modality, or to both. # PERFORMING THE MCA mca_taste <- soc.mca( active = active, # note, this is not a good object naming practice. sup = sup, # -''- identifier = id) # this is better, because parameter != object name. # General information on the MCA print(mca_taste) #There are a lot of values to get using mca_taste$... #For example to get the names of the active modalities mca_taste$names.mod #Or the supplementary modalities mca_taste$names.sup #Etc. # Getting the variance of axis (eigenvalues), variance rates and modified rates for the first 12 axis variance(mca_taste, dim = c(1:12)) # See p.46 in Le Roux(2010) # Interpretation of the 3 first axis contribution(mca_taste, 1) contribution(mca_taste, 2) contribution(mca_taste, 3) contribution(mca_taste, 1:5, mode = "variable") #### Mapping cloud of categories, cloud of individuals and supplementary categories #### ## MAPPING THE ACTIVE CATEGORIES # Cloud of categories plane 1,2 map.active( object = mca_taste, dim = c(1, 2), point.shape = "variable", point.alpha = 0.8, #point.fill = "whitesmoke", point.fill = mca_taste$variable, # Color points by active variable point.color = "black", point.size = "freq", #point.size = mca_taste$ctr.mod[,1], # Size of point determined by contribution to the first axis label = TRUE, label.repel = TRUE, # Avoid to much overlapp of variables label.alpha = 0.8, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Active modalities plane 1-2", labelx = "default", labely = "default", legend = NULL ) # Cloud of categories plane 2,3 map.active( object = mca_taste, dim = c(3, 2), point.shape = "variable", point.alpha = 0.8, point.fill = "whitesmoke", #point.fill = mca_taste$variable, # Color points by active variable point.color = "black", point.size = "freq", # point.size = mca_taste$ctr.mod[,1], # Size of point determined by contribution to the first axis label = TRUE, label.repel = TRUE, # Avoid to much overlapp of variables label.alpha = 0.8, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Active modalities plane 2-3", labelx = "default", labely = "default", legend = NULL ) # It is also possible to map only those categories contributing above average on each axis # Example for axis 1 map.ctr(mca_taste, dim = c(1, 2), ctr.dim = 1, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 6, label.fill = NULL, map.title = "Above average modalities: axis 1", labelx = "default", labely = "default", legend = NULL) # Example for axis 1 and 2 map.ctr(mca_taste, dim = c(1, 2), ctr.dim = 1:2, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "Above average modalities: axis 1 and 2", labelx = "default", labely = "default", legend = NULL) ## MAPPING THE SUPPLEMENTARY CATEGORIES map.sup(mca_taste, dim = c(1, 2), point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Space of supplementary variables. Axes 1 and 2", labelx = "default", labely = "default", legend = NULL) map.sup(mca_taste, dim = c(1, 2), point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Space of supplementary variables. Axes 1 and 2", labelx = "default", labely = "default", legend = NULL) ## MAPPING ACTIVE AND SUPPLEMENTARY CATEGORIES IN SAME GRAPH map.select(mca_taste, dim = c(1, 2), list.mod = TRUE, list.sup = TRUE, list.ind = NULL, point.shape = "variable", point.alpha = .8, point.fill = "whitesmoke", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 5, label.fill = NULL, map.title = "Map selection of variables", labelx = "default", labely = "default", legend = NULL) ## MAPPING THE CLOUD OF INDIVIDUALS # Plane 1, 2 map.ind( mca_taste, dim = c(1, 2), point.shape = 21, point.alpha = 0.8, point.fill = "whitesmoke", point.color = "black", point.size = 2, label = FALSE, label.repel = FALSE, label.alpha = 0.8, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Cloud of individuals plane 1-2", labelx = "default", labely = "default", legend = NULL ) # Plane 2, 3 map.ind( mca_taste, dim = c(3, 2), point.shape = 21, point.alpha = 0.8, point.fill = "whitesmoke", point.color = "black", point.size = 2, label = FALSE, label.repel = FALSE, label.alpha = 0.8, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Cloud of individuals plane 2-3", labelx = "default", labely = "default", legend = NULL ) # Colour ind based on age map.ind( mca_taste, dim = c(1, 2), point.shape = 21, point.alpha = 0.8, point.fill = sup$Age, point.color = "black", point.size = 2, label = FALSE, label.repel = FALSE, label.alpha = 0.8, label.color = "black", label.size = 4, label.fill = NULL, map.title = "Cloud of individuals plane 1-2", labelx = "default", labely = "default", legend = sup$Age ) #MAP A SELECTION OF MODALITIES #Here we look for the rank of supplementary modalities (or active, with "list.mod", or individuals, with "list.ind"), and we add a vector with their number: mca_taste$names.sup mca_taste$names.mod # The TV-modalities and age map.select(mca_taste, dim = c(1, 2), list.mod = c(9:16), list.sup = c(3:8), list.ind = NULL, point.shape = "variable", point.alpha = .8, point.fill = "whitesmoke", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 5, label.fill = NULL, map.title = "Map selection of variables", labelx = "default", labely = "default", legend = NULL) #mapping a group of individuals, example all aged 65+ attach(taste_act) oldies <- Age == "65+" map.select(mca_taste, dim = c(1, 2), list.mod = NULL, list.sup = NULL, list.ind = oldies, point.shape = 21, point.alpha = .8, point.fill = "whitesmoke", point.color = "black", point.size = 3, label = FALSE, label.repel = FALSE, label.alpha = 1, label.color = "black", label.size = 5, label.fill = NULL, map.title = "Map selection of variables", labelx = "default", labely = "default", legend = NULL) # Concentration ellipse for a subset of the population map <- map.ind(mca_taste) map.ellipse(mca_taste, map, as.factor(taste_act$Age == "18-24" )) #### EXTRAS ### #Map ellipses of supplementary variables in cloud of individuals map.ellipse(mca_taste, ca.plot = map.ind(mca_taste, point.size = 2, point.alpha = 0.8, point.color = "grey", point.fill = "grey"), sup$Age, ellipse.label = TRUE, ellipse.color = sup$Age, label.size = 4, #draw.levels = 5:nlevels(sup$taste.Age), ellipse.line = "solid") #### CLUSTER ANALYSIS #### library(cluster) coords <- mca_taste$coord.ind[, 1:12] #Clustering on the first ten axes clust <- agnes(coords, method="ward") layout(matrix(c(1,1))) plot(clust,ask=F,which.plots=2) clusters <- cutree(clust, k = 4) table(clusters) # k = 4, or any other number of clusters #Here we use Ward's algorithm. I also try with kmeans, or others. #Color the map of individuals by cluster (with the Ward solution) map.ind(mca_taste, point.fill = as.factor(clusters), dim = c(1, 2), point.shape = 21, point.alpha = 1, point.size = 3, map.title = "Space of individuals, axes 1 and 2", labelx = "default", labely = "default", point.color = "black") map.ind(mca_taste, point.fill = as.factor(clusters), dim = c(1, 3), point.shape = 21, point.alpha = 1, point.size = 5, map.title = "Space of individuals, axes 1 and 3", labelx = "default", labely = "default", point.color = "black") #And to have these clusters' ellipses clusters_rec <- factor(clusters) clusters_rec <- recode(clusters, "1" = "Cluster 1", "2" = "Cluster 2", "3" = "Cluster 3", "4" = "Cluster 4", ) table(clusters_rec) sup_rec <- data.frame(sup, clusters_rec) mca_taste.clusters <- soc.mca(active, sup_rec, id) map.ellipse(mca_taste.clusters, ca.plot = map.ind(mca_taste.clusters), sup_rec$clusters_rec, ellipse.label = TRUE, ellipse.color = "black", label.size = 5, draw.levels = 4:nlevels(sup_rec$clusters_rec), ellipse.line = "solid") #Qualification of the clusters (through active and supplementary variables - or any other by the way) library(FactoMineR) Clusters_Qualification <- data.frame(active, sup_rec) #Identify in the list the rank of the cluster variable. Let's say here 25. names(Clusters_Qualification) catdes(Clusters_Qualification_2,25,proba = 0.05) #### Class specific analysis (CSA). For example on a supplementary variable Gender #### SexWoman <- which(sup$Gender == "Women") CSA_Woman <- soc.csa(mca_taste, SexWoman, sup = NULL) variance(CSA_Woman) csa.measures(CSA_Woman, correlations = TRUE, cosines = TRUE, cosine.angles = TRUE, dim = 1:4, format = TRUE) SexMan <- which(sup$Gender == "Men") CSA_Man <- soc.csa(mca_taste, SexMan, sup = NULL) variance(CSA_Man) csa.measures(CSA_Man, correlations = TRUE, cosines = TRUE, cosine.angles = TRUE, dim = 1:5, format = TRUE) #Map contributing modalities map.ctr(CSA_Woman, dim = c(1, 2), ctr.dim = 1, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "CSA Women. Above Average Modalities: Axis 1", labelx = "default",labely = "default", legend = NULL) map.ctr(CSA_Woman, dim = c(1, 2), ctr.dim = 2, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "CSA Women. Above Average Modalities: Axis 2", labelx = "default",labely = "default", legend = NULL) map.ctr(CSA_Woman, dim = c(1, 3), ctr.dim = 3, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "CSA Women. Above Average Modalities: Axis 3", labelx = "default",labely = "default", legend = NULL) map.ctr(CSA_Man, dim = c(1, 2), ctr.dim = 1, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "CSA Men. Above Average Modalities: Axis 1", labelx = "default",labely = "default", legend = NULL) map.ctr(CSA_Man, dim = c(1, 2), ctr.dim = 2, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "CSA Men. Above Average Modalities: Axis 2", labelx = "default",labely = "default", legend = NULL) map.ctr(CSA_Man, dim = c(1, 3), ctr.dim = 3, point.shape = "variable", point.alpha = 1, point.fill = "black", point.color = "black", point.size = "freq", label = TRUE, label.repel = TRUE, label.alpha = 1, label.color = "black", label.size = 3, label.fill = NULL, map.title = "CSA Men. Above Average Modalities: Axis 3", labelx = "default",labely = "default", legend = NULL)