#### Script to perform Specific Multiple Correspondence Analysis with the package soc.ca # Package dokumentation: https://cran.r-project.org/web/packages/soc.ca/soc.ca.pdf # The practice dataset is the The Taste Example, also used in Le Roux & Rouanet (2010): Multiple Correspondences Analysis, Sage. # The data involve Q = 4 active variables and K = 8 + 8 + 7 + 6 = 29 categories # n = 1215 individuals (after removing supplementary individuals) ##### PREPARATIONS #### #### Installation of packages (only once if you haven't done it before) #install.packages("soc.ca") #install.packages("tidyverse") # Load packages library(soc.ca) library(tidyverse) # Read the taste dataset that is included in the package data(taste) # removing the supplementary individuals taste_act <- taste %>% filter(Isup == "Active") # Basic statistics summary(taste_act) #Data.frames creation for active and supplementary variables (all variables have to be factors, with NO missing values, meaning that NAs must be put in a category called "Missing", or something else) active <- taste_act %>% select(TV, Film, Art, Eat) sup <- taste_act %>% select(Gender, Age, Income) id <- taste_act %>% select(ID) # Option to define Passive modalities for specific MCA (SMCA). The # default option in the function soc.ca is that all individuals with missing # values will be set ass passive. # options(passive = c("taste.active1: Missing", "taste.active2: Missing")) # To define passive modalities, we have to create a vector with, between quote marks: "the exact name of the variable within the data.frame, colon, space, the exact name of the modality". There still can be conflicts. In that case, give another name to the variable/the modality, or to both. # Performing the MCA with selection of supplementary categories smca_taste <- soc.mca( active = active, sup = sup, identifier = id, passive = c("TV: Tv-Police", "Film: Musical") ) # General information on the MCA print(smca_taste) # Specific MCA in FactoMineR #### GDAtools::getindexcat(taste[,3:6]) fm_smca <- FactoMineR::MCA(taste[,3:6], excl = c(6, 14)) # Specific MCA in GDAtools #### library(GDAtools) data(Taste) junk <- c("FrenchPop.NA", "Rap.NA", "Rock.NA", "Jazz.NA", "Classical.NA", "Comedy.NA", "Crime.NA", "Animation.NA", "SciFi.NA", "Love.NA", "Musical.NA") gt_mca <- speMCA(Taste[,1:11], excl = junk) print(gt_mca) # Structured data analysis ## Concentration ellipses. map.ellipse(smca_taste, ca.plot = map.ind(smca_taste, point.size = 2, point.alpha = 0.8, point.color = "grey", point.fill = "grey"), sup$Age, ellipse.label = TRUE, ellipse.color = sup$Age, label.size = 4, #draw.levels = 5:nlevels(sup$taste.Age), ellipse.line = "solid") # Breakdown of variance (Le Roux & Rouanet 2010: 78-79) sda <- breakdown.variance( object = smca_taste, dim = 1:3, variable = sup$Age) sda # Correlation ratio = eta2 # Crossing of age and gender p <- ggcloud_variables(gt_mca, col = "lightgrey", shapes = FALSE) ggadd_interaction(p, gt_mca, Taste$Gender, Taste$Age)