#### Script to perform (simple) Correspondence Analysis (CA) with the packages "FactoMineR" and "factoextra" ##### PREPARATIONS #### ####Install packages (only once if you haven't done it before) #install.packages("FactoMineR") #install.packages("factoextra") #install.packages("data.table") #install.packages("ggrepel") # load packages library(FactoMineR) library(factoextra) library(tidyverse) library(data.table) library(ggrepel) # Importing data ca_raw <- read.csv("ca_example.csv", encoding = "Latin-1") # or ca_raw <- fread("ca_example.csv", encoding = "Latin-1") #The data is a two way contingency table with schools and programs as rows and columns consisting of social groups X gender (along with some supplementary variables). # View the data head(ca_raw) ca_raw %>% view() # Preparing the dataset for the CA for_ca <- ca_raw %>% column_to_rownames(var = "id") # Perform the CA with the FactoMinr package ca <- CA(for_ca, col.sup = 43:65, # Defines the supplementary columns graph = FALSE ) # Examine the eigenvalues to determine the number of axis to be considered get_eigenvalue(ca) fviz_eig(ca, addlabels = TRUE) ca$col # Graph including rows, columns and supplementary variables fviz_ca_biplot(ca, repel = TRUE)+ theme_bw()+ coord_equal() # Graph were we just keep the individuals (schools) fviz_ca_biplot(ca, repel = TRUE, invisible = c("row.sup","col", "col.sup"))+ theme_bw()+ coord_equal() # Graph were we just keep the columns (active variables) fviz_ca_biplot(ca, repel = TRUE, invisible = c("row", "row.sup", "col.sup"))+ theme_bw()+ coord_equal() # Active rows and columns fviz_ca_biplot(ca, repel = TRUE, invisible = c("row.sup", "col.sup"))+ theme_bw()+ coord_equal() # Keeping just the supplementary variables fviz_ca_biplot(ca, repel = TRUE, invisible = c("row", "col"))+ theme_bw()+ coord_equal() # Select the top 10 contributing rows # And the top 10 columns fviz_ca_biplot(ca, select.row = list(contrib = 10), select.col = list(contrib = 10)) # Color gradient by contribution fviz_ca_row(ca, col.row = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE)+ theme_bw()+ coord_equal() # Most contributing columns and rows to the first two axis fviz_contrib(ca, choice = "col", axes = 1, top = 20) fviz_contrib(ca, choice = "col", axes = 2, top = 20) fviz_contrib(ca, choice = "row", axes = 1, top = 20) fviz_contrib(ca, choice = "row", axes = 2, top = 20) #### Script showing how to extract coordinates and contributions for GGplot vizualisations #### n_schools <- ca_raw %>% select(school_prog = id, n_ind) ca_act_schools_coord <- ca$row$coord %>% as.data.frame() %>% rownames_to_column() %>% rename(school_prog = rowname) %>% left_join(n_schools) %>% mutate(act_sup = "active") ca_suppl_var <- ca$col.sup$coord %>% as.data.frame() %>% rownames_to_column() %>% rename(school_prog = rowname) %>% mutate(act_sup = "sup_var", n_ind = 100) ca_act_var <- ca$col$coord %>% as.data.frame() %>% rownames_to_column() %>% rename(school_prog = rowname) %>% mutate(act_sup = "act_var", n_ind = 100) ca_schools <- ca_act_schools_coord %>% bind_rows(ca_act_var) %>% bind_rows(ca_suppl_var) g1 <- ggplot(ca_schools, aes(x = `Dim 1`, y = `Dim 2`, label = school_prog, shape = act_sup, color = act_sup, alpha = act_sup )) + geom_point(aes(size = n_ind)) + scale_shape_manual(values = c(16, 25, 18))+ scale_color_manual(values = c("black", "red", "blue"))+ scale_alpha_manual(values = c(0.5, 0.5, 0.5))+ geom_hline(yintercept = 0, color = "black")+ geom_vline(xintercept = 0, color = "black")+ geom_text_repel()+ theme_bw()+ theme(legend.position = "none")+ coord_equal() g1