Economic inequality and economic segregation: a systematic review of causal pathways

Companion notebook of the Social Forces 2025 article: “Economic inequality and economic segregation: a systematic review of causal pathways” by Clémentine Cottineau-Mugadza within the ERC project SEGUE.

Screening

Graph to follow the evolution of manual screening (Appendix A)

globalplot <- function(data){
  categorised <- data %>% 
    filter(CAT_title %in% LETTERS[1:8]) 
  ncoded <- dim(categorised)[1]
  
  print(paste0("# of A: ", dim(filter(categorised,CAT_title == "A"))[1]))
  print(paste0("# of Core: ", dim(filter(categorised,Core == 1))[1]))
  p<- categorised %>%
    ggplot(aes(CAT_title)) + 
    geom_bar(fill = c("goldenrod1", "grey25","grey40","goldenrod3",
                      "grey75","grey85","grey95","white")) +
    ggtitle(paste0("n=",ncoded, " | ",round(ncoded/dim(wos_cat)[1] * 100,1), "%"))
  
  p
  print(p)
  ggsave(paste0("histevol_",ncoded,".png"))
}
evolhist <- function(data, binsize){
  categorised <- data %>% 
    filter(CAT_title %in% LETTERS[1:8]) 
  ncoded <- dim(categorised)[1]
  
  categorised$n <- cut(as.numeric(rownames(categorised)),seq(0,ncoded, by=binsize))
  
  lastA <- sort(as.numeric(rownames(categorised[categorised$CAT_title == "A",])),decreasing = T)[1]
  print(paste0("# since last A: ", ncoded - lastA))
  
  sumplot <- categorised %>%
    mutate(Bin = gsub("\\(|\\]", "", n)) %>%
    separate(Bin, sep = ",", into = c("lower", "upper")) %>%
    mutate(across(lower:upper, as.numeric)) %>%
    mutate(`Sample` = (upper + lower) / 2) 
  
  p<- ggplot(data = sumplot,
         aes(x=Sample,fill=CAT_title)) + 
    geom_histogram(position = position_fill(reverse = TRUE),
                   stat="count", colour="white", width=binsize)+
  scale_fill_manual(values = c("goldenrod1", "grey25","grey40","goldenrod3",
                                "grey75","grey85","grey95","white")) +
    labs(x = "Sample reviewed", y="Frequency") +
    ggtitle(paste0("n=",ncoded, " | ",round(ncoded/dim(wos_cat)[1] * 100,1), "%"))
  
  
   print(p)
   print(table(sumplot$Sample, sumplot$CAT_title))
   p
   ggsave(paste0("histevol_",ncoded,".png"))
}

Distribution of records by category (Appendix A)

wos_cat<- read.csv2("data/wos_list2_CAT_CC.csv",sep=";")

globalplot(wos_cat)

## [1] "# of A: 101"
## [1] "# of Core: 5"

## Saving 7 x 5 in image

evolhist(data=wos_cat, binsize = 500)

## [1] "# since last A: 1"

##       
##          A   B   C   D   E   F   G   H
##   250   57  36 220  15  30  87  32  23
##   750    6  16 272   7  41  97  38  23
##   1250   4  13 112   7  22 166  94  82
##   1750   6  13  65  17  18 190 109  82
##   2250   4  11  74  13  24 180 106  88
##   2750   5  13  64   7  20 173 109 109
##   3250   6  20  70   7  35 177  89  96
##   3750   1   7  56  11  24 153 100 148
##   4250   1   6  54  13  36 169  96 125
##   4750   4  11  59   7  16 158  78 167
##   5250   3  12  61  21  26 166  76 135

## Saving 7 x 5 in image

Random sampling of title examples for each category

randomtitles <- function(data, n){
  titletable <- data.frame()
for(i in LETTERS[1:8]){
  cattitles <- as_tibble(data) %>% 
    filter(CAT_title == i) %>%
    dplyr::select(.,Article.Title) 

  titletable[1:n,i] <- sample(cattitles$Article.Title, n, replace = FALSE, prob = NULL)
   }
  titletable
}
titlextab <- randomtitles(wos_cat,10)
 
head(titlextab)

##                                                                                                                     A
## 1        Neighbourhood effects and beyond: Explaining the paradoxes of inequality in the changing American metropolis
## 2                                           The Income-Inequality Relationship within US Metropolitan Areas 1980-2016
## 3                                              Black-white income inequality and metropolitan socioeconomic structure
## 4            Sociocultural, economic and ethnic homogeneity in residential mobility and spatial sorting among couples
## 5 Expanding homes and increasing inequalities: US housing development and the residential segregation of the affluent
## 6                                           Inequality, residential segregation by income, and mortality in US cities
##                                                                                                                                     B
## 1                                            Neighborhoods on the Rise: A Typology of Neighborhoods Experiencing Socioeconomic Ascent
## 2                                        Microeconomic model of residential location incorporating life cycle and social expectations
## 3                                            The Alibaba effect: Spatial consumption inequality and the welfare gains from e-commerce
## 4                                                                   Housing Decisions Among Low-Income Hispanic Households in Chicago
## 5 Small-Area Incomes: Their Spatial Variability and the Relative Efficacy of Proxy, Geodemographic, Imputed and Model-Based Estimates
## 6                                                                  How economic segregation affects children's educational attainment
##                                                                                                                                                     C
## 1                                               ON INTERGENERATIONAL IMMOBILITY: EVIDENCE THAT ADULT CREDIT HEALTH REFLECTS THE CHILDHOOD ENVIRONMENT
## 2 Which Income Inequality Influences Which Health Indicators? Analysis of the Income Inequality Hypothesis with Market and Disposable Gini Indicators
## 3                                                                                      Racial economic subordination and white gain in the U.S. South
## 4    Untangling the Complexity of the Association between Contracting and Local Fiscal Performance and Income Inequality in Terms of Competing Values
## 5                                                                      Generalizing the Inequality Process' gamma model of particle wealth statistics
## 6                                                           FOSSIL FUEL SUBSIDIES, INCOME INEQUALITY, AND POVERTY: EVIDENCE FROM DEVELOPING COUNTRIES
##                                                                                                                                                                                  D
## 1                                                                                                                    Migration, ethnicity, and inequality: Homeownership in Israel
## 2                                   Observed trends in the magnitude of socioeconomic and area-based inequalities in use of caesarean section in Ethiopia: a cross-sectional study
## 3                                                                                                                                       REGULATION AND THE GEOGRAPHY OF INEQUALITY
## 4 An analysis of the nutrition status of neighboring Indigenous and non-Indigenous populations in Kanungu District, southwestern Uganda: Close proximity, distant health realities
## 5                                                                                         Social inequality and urban regeneration in Barcelona city centre: reconsidering success
## 6                                                                                                                               Inequality, neighbourhoods and welfare of the poor
##                                                                                                                        E
## 1                                                          Making do: Religious segregation and everyday water struggles
## 2                     The interaction of segregation and suburbanization in an agent-based model of residential location
## 3    Through the bridges: the Black Cultural Association in Sao Paulo, urban planning and the contours of the white city
## 4 Birth outcomes among urban African-American women: A multilevel analysis of the role of racial residential segregation
## 5                       Housing Liberalisation and Gentrification: The Social Effects of Tenure Conversions in Amsterdam
## 6                                                  'Now the German comes': The ethnic effect of gentrification in Berlin
##                                                                                                                                          F
## 1                    Trends in Inequality in Food Consumption and Calorie Intake in India: Evidence from the Last Three Decades, 1983-2012
## 2                  Trade liberalisation, poverty and inequality in South Africa: A computable general equilibrium-microsimulation analysis
## 3                                                    Channels of Inequality of Opportunity: The Role of Education and Occupation in Europe
## 4 What effect does inequality have on residents' sense of safety? Exploring the mediating processes of social capital and civic engagement
## 5                                                                               Poverty alleviation, inequality and welfare in rural China
## 6                               Socioeconomic inequalities in women's access to health care: has Ecuadorian health reform been successful?
##                                                                                                                                               G
## 1                                                                                           Cost Distortions and Structural Imbalances in China
## 2                                                                                           Some alternative geo-economics for Europe's regions
## 3                                                                        Global Apartheid. Development and Underdevelopment after Globalization
## 4                                                             DOES UNDERNUTRITION RESPOND TO INCOMES AND PRICES - DOMINANCE TESTS FOR INDONESIA
## 5                                     Taylor linearization sampling errors and design effects for poverty measures and other complex statistics
## 6 Labor Market Segmentation in Urumqi, Xinjiang: Exposing Labor Market Segments and Testing the Relationship between Migration and Segmentation
##                                                                                                            H
## 1 Unpacking democracy: The effects of different democratic qualities on climate change performance over time
## 2              Contextualizing the COVID-19 Era in Puerto Rico: Compounding Disasters and Parallel Pandemics
## 3                 Structural factors and black interracial homicide: A new examination of the causal process
## 4                 International socioeconomic inequality drives trade patterns in the global wildlife market
## 5                      Does the median voter model explain the size of government?: Evidence from the states
## 6                        Subjective well-being in the new China: religion, social capital, and social status

AI-supported screening

asreview1<- read_excel("data/asreview_dataset_all_economic-inequality-and-urban-economic-segregation-segue-1stRound.xlsx")

conting <- table(asreview1$included, asreview1$CAT_title)
conting

##    
##      A  B  C  D  E  F  G  H
##   0  0 48 73 18 77 44 14 25
##   1 82 21  1  4  6  1  1  0

prop.table(conting,2)

##    
##              A          B          C          D          E          F
##   0 0.00000000 0.69565217 0.98648649 0.81818182 0.92771084 0.97777778
##   1 1.00000000 0.30434783 0.01351351 0.18181818 0.07228916 0.02222222
##    
##              G          H
##   0 0.93333333 1.00000000
##   1 0.06666667 0.00000000

summary(asreview1)

##    record_id        Title            CAT_title           Abstract        
##  Min.   :    0   Length:20369       Length:20369       Length:20369      
##  1st Qu.: 5092   Class :character   Class :character   Class :character  
##  Median :10184   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :10184                                                           
##  3rd Qu.:15276                                                           
##  Max.   :20368                                                           
##                                                                          
##    Authors          Source Title       Journal Abbreviation Publication Year
##  Length:20369       Length:20369       Length:20369         Min.   :1985    
##  Class :character   Class :character   Class :character     1st Qu.:2012    
##  Mode  :character   Mode  :character   Mode  :character     Median :2017    
##                                                             Mean   :2015    
##                                                             3rd Qu.:2020    
##                                                             Max.   :2023    
##                                                             NA's   :567     
##      Volume            Issue        Start Page          End Page        
##  Min.   :   1.00   Min.   :    1   Length:20369       Length:20369      
##  1st Qu.:  18.00   1st Qu.:    2   Class :character   Class :character  
##  Median :  36.00   Median :    3   Mode  :character   Mode  :character  
##  Mean   :  57.09   Mean   : 1080                                        
##  3rd Qu.:  63.00   3rd Qu.:    5                                        
##  Max.   :2224.00   Max.   :45271                                        
##  NA's   :1231      NA's   :4387                                         
##      DOI              Language         Author Keywords    Cited Reference Count
##  Length:20369       Length:20369       Length:20369       Min.   :  0.00       
##  Class :character   Class :character   Class :character   1st Qu.: 32.00       
##  Mode  :character   Mode  :character   Mode  :character   Median : 47.00       
##                                                           Mean   : 52.14       
##                                                           3rd Qu.: 65.00       
##                                                           Max.   :637.00       
##                                                                                
##  Times Cited, All Databases UT (Unique WOS ID)    Reviewed     exported_notes_1
##  Min.   :   0.00            Length:20369       Min.   :1       Mode:logical    
##  1st Qu.:   1.00            Class :character   1st Qu.:1       NA's:20369      
##  Median :   6.00            Mode  :character   Median :1                       
##  Mean   :  21.64                               Mean   :1                       
##  3rd Qu.:  19.00                               3rd Qu.:1                       
##  Max.   :4884.00                               Max.   :1                       
##                                                NA's   :20366                   
##     included     asreview_ranking
##  Min.   :0.000   Min.   :    1   
##  1st Qu.:0.000   1st Qu.: 5093   
##  Median :0.000   Median :10185   
##  Mean   :0.195   Mean   :10185   
##  3rd Qu.:0.000   3rd Qu.:15277   
##  Max.   :1.000   Max.   :20369   
##  NA's   :19465

Description of the corpus

Aggregation

asreviewWOS<- read_excel("data/asreview_dataset_all_assessing.xlsx") |>
  filter(included == 1) |>
  mutate(source = "WOS")

colnames(asreviewWOS)

##  [1] "record_id"                  "Title"                     
##  [3] "CAT_title"                  "Abstract"                  
##  [5] "Authors"                    "Source Title"              
##  [7] "Journal Abbreviation"       "Publication Year"          
##  [9] "Volume"                     "Issue"                     
## [11] "Start Page"                 "End Page"                  
## [13] "DOI"                        "Language"                  
## [15] "Author Keywords"            "Cited Reference Count"     
## [17] "Times Cited, All Databases" "UT (Unique WOS ID)"        
## [19] "Elligibility"               "CorePaper"                 
## [21] "included"                   "asreview_ranking"          
## [23] "reason_inelligibility"      "source"

fulltextlist <- asreviewWOS |>
  mutate(cit = paste0(Authors, ", ", `Publication Year`, ", ",Title,", ",
                      `Source Title`,", ", Volume, ", ", Issue, 
                      ", ",`Start Page`, "-",`End Page`)) |>
  select(cit)

#write_excel_csv(fulltextlist, "data/citation_elligible.txt")


asreviewWOS |> 
  group_by(Elligibility, CorePaper) |>
  summarise(n = n(),
            minYear = min(`Publication Year`, na.rm=T),
            maxYear = max(`Publication Year`, na.rm=T),
            meanYear = mean(`Publication Year`, na.rm=T),
            medYear = median(`Publication Year`, na.rm=T),
            meanRefs = mean(`Cited Reference Count`),
            meanCites = mean(`Times Cited, All Databases`),
            maxRef = max(`Times Cited, All Databases`, na.rm=T),
            
  )

## `summarise()` has grouped output by 'Elligibility'. You can override using the
## `.groups` argument.

## # A tibble: 3 × 10
## # Groups:   Elligibility [2]
##   Elligibility CorePaper     n minYear maxYear meanYear medYear meanRefs
##          <dbl>     <dbl> <int>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1            0         0   109    1992    2023    2014.   2016.     52.9
## 2            1         0    46    1991    2023    2016.   2018      57.4
## 3            1         1    30    1995    2022    2015.   2017      51.4
## # ℹ 2 more variables: meanCites <dbl>, maxRef <dbl>

Merging sources (Figure 2B)

expertSources<- read_excel("data/asreview_dataset_all_assessing_othersources.xlsx") |>
  filter(included == 1) |>
  mutate(source = substr(`unique ID`,1,5))

colnames(expertSources) <- colnames(asreviewWOS)

asreview2 <- rbind(asreviewWOS, expertSources)

asreview2$Language <- as.factor(asreview2$Language)
summary(asreview2)

##    record_id        Title            CAT_title           Abstract        
##  Min.   :    0   Length:189         Length:189         Length:189        
##  1st Qu.:   47   Class :character   Class :character   Class :character  
##  Median :   94   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 1007                                                           
##  3rd Qu.:  141                                                           
##  Max.   :40003                                                           
##                                                                          
##    Authors          Source Title       Journal Abbreviation Publication Year
##  Length:189         Length:189         Length:189           Min.   :1991    
##  Class :character   Class :character   Class :character     1st Qu.:2011    
##  Mode  :character   Mode  :character   Mode  :character     Median :2017    
##                                                             Mean   :2015    
##                                                             3rd Qu.:2020    
##                                                             Max.   :2023    
##                                                             NA's   :3       
##      Volume            Issue        Start Page          End Page        
##  Min.   :   1.00   Min.   :    1   Length:189         Length:189        
##  1st Qu.:  35.00   1st Qu.:    2   Class :character   Class :character  
##  Median :  53.00   Median :    3   Mode  :character   Mode  :character  
##  Mean   :  81.41   Mean   : 1823                                        
##  3rd Qu.:  79.50   3rd Qu.:    5                                        
##  Max.   :2007.00   Max.   :45271                                        
##  NA's   :7         NA's   :34                                           
##      DOI               Language   Author Keywords    Cited Reference Count
##  Length:189         English:183   Length:189         Min.   : 11.00       
##  Class :character   French :  1   Class :character   1st Qu.: 35.00       
##  Mode  :character   Spanish:  5   Mode  :character   Median : 49.00       
##                                                      Mean   : 54.44       
##                                                      3rd Qu.: 68.00       
##                                                      Max.   :188.00       
##                                                                           
##  Times Cited, All Databases UT (Unique WOS ID)  Elligibility   
##  Min.   :   0.00            Length:189         Min.   :0.0000  
##  1st Qu.:   2.00            Class :character   1st Qu.:0.0000  
##  Median :  13.00            Mode  :character   Median :0.0000  
##  Mean   :  59.45                               Mean   :0.4233  
##  3rd Qu.:  37.00                               3rd Qu.:1.0000  
##  Max.   :4884.00                               Max.   :1.0000  
##                                                                
##    CorePaper         included asreview_ranking reason_inelligibility
##  Min.   :0.0000   Min.   :1   Min.   :  1      Length:189           
##  1st Qu.:0.0000   1st Qu.:1   1st Qu.: 47      Class :character     
##  Median :0.0000   Median :1   Median : 93      Mode  :character     
##  Mean   :0.1693   Mean   :1   Mean   : 93                           
##  3rd Qu.:0.0000   3rd Qu.:1   3rd Qu.:139                           
##  Max.   :1.0000   Max.   :1   Max.   :185                           
##                               NA's   :4                             
##     source         
##  Length:189        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##

asreview2 |> 
  group_by(Elligibility, CorePaper) |>
  summarise(n = n(),
            minYear = min(`Publication Year`, na.rm=T),
            maxYear = max(`Publication Year`, na.rm=T),
            meanYear = mean(`Publication Year`, na.rm=T),
            medYear = median(`Publication Year`, na.rm=T),
            meanRefs = mean(`Cited Reference Count`),
            meanCites = mean(`Times Cited, All Databases`)
            )

## `summarise()` has grouped output by 'Elligibility'. You can override using the
## `.groups` argument.

## # A tibble: 3 × 9
## # Groups:   Elligibility [2]
##   Elligibility CorePaper     n minYear maxYear meanYear medYear meanRefs
##          <dbl>     <dbl> <int>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1            0         0   109    1992    2023    2014.   2016.     52.9
## 2            1         0    48    1991    2023    2016.   2018      59.3
## 3            1         1    32    1995    2023    2015.   2017      52.5
## # ℹ 1 more variable: meanCites <dbl>

Creating reference lists

Appendix B & C

eligiblelist <- asreview2 |>
  filter(Elligibility == 1) |>
  mutate(cit = paste0(Authors, ", ", `Publication Year`, ", ",Title,", ",
                      `Source Title`,", ", Volume, ", ", Issue, 
                      ", ",`Start Page`, "-",`End Page`)) |>
  select(cit)

head(eligiblelist)

## # A tibble: 6 × 1
##   cit                                                                           
##   <chr>                                                                         
## 1 Fernandes, L; Tempere, J, 2020, Effect of segregation on inequality in kineti…
## 2 Watson, T, 2009, INEQUALITY AND THE MEASUREMENT OF RESIDENTIAL SEGREGATION BY…
## 3 Tammaru, T; Marcinczak, S; Aunap, R; van Ham, M; Janssen, H, 2020, Relationsh…
## 4 Owens, A, 2018, Income Segregation between School Districts and Inequality in…
## 5 Lobmayer, P; Wilkinson, RG, 2002, Inequality, residential segregation by inco…
## 6 Yabe, T; Ukkusuri, SV, 2020, Effects of income inequality on evacuation, reen…

#write_excel_csv(eligiblelist, "data/citation_scoping.txt")

corelist <- asreview2 |>
  filter(CorePaper == 1) |>
  mutate(cit = paste0(Authors, ", ", `Publication Year`, ", ",Title,", ",
                      `Source Title`,", ", Volume, ", ", Issue, 
                      ", ",`Start Page`, "-",`End Page`)) |>
  select(cit)

head(corelist)

## # A tibble: 6 × 1
##   cit                                                                           
##   <chr>                                                                         
## 1 Fernandes, L; Tempere, J, 2020, Effect of segregation on inequality in kineti…
## 2 Watson, T, 2009, INEQUALITY AND THE MEASUREMENT OF RESIDENTIAL SEGREGATION BY…
## 3 Tammaru, T; Marcinczak, S; Aunap, R; van Ham, M; Janssen, H, 2020, Relationsh…
## 4 Scarpa, S, 2015, The impact of income inequality on economic residential segr…
## 5 Rodriguez, GM, 2020, Socioeconomic Inequality and Housing Segregation, CUADER…
## 6 Chen, WH; Myles, J; Picot, G, 2012, Why Have Poorer Neighbourhoods Stagnated …

#write_excel_csv(corelist, "data/citation_core.txt")

Analysing eligible articles

Theoretical framework of causal pathways

nodes <- data.frame(id = LETTERS[1:7],
                    concepts = c("Economic inequality","Economic segregation",
                                 "Labour ineq.",
                                 "School ineq.","Housing ineq.",
                                 "Other ineq. (health, crime...)",
                                 "Other"),
                    type = c("Main concept","Main concept",
                             "Mediator", "Mediator",
                             "Mediator","Mediator",
                             "Other"),
                    "lon" = c(1, 3, 2, 2, 2, 2, 2),
                    "lat" = c(2.5, 2.5, 4.5, 4, 3.5, 2, 1.5))


links <- data.frame( from = c("A","C", "D", "E", "F",
                              "G", "B", "C","D", "E",
                              "F", "G", "A", "A", "A",
                              "A", "A", "B", "B", "B",
                              "B", "B", "D", "C", "D",
                              "E", "E", "C", "G", "C",
                              "G", "G", "C", "D", "E",
                              "F", "G"),
                     to = c("B", "B","B", "B", "B",
                            "B", "A", "A", "A", "A",
                            "A", "A", "C", "D", "E",
                            "F", "G", "C", "D", "E",
                            "F", "G", "C", "D", "E",
                            "D", "C", "E", "C", "G",
                            "E", "F", "G", "G", "G",
                            "G", "D"),
                     id = 1:37,
                     type = c("Direct", "Mediated", "Mediated","Mediated","Mediated",
                              "External", "Direct", "Mediated", "Mediated","Mediated",
                              "Mediated", "External", "On mediators", "On mediators", "On mediators",
                              "On mediators", "External", "On mediators", "On mediators", "On mediators",
                              "On mediators", "External", "Inter-Mediator", "Inter-Mediator", "Inter-Mediator",
                              "Inter-Mediator", "Inter-Mediator", "Inter-Mediator", "External", "External",
                              "External", "External","External", "External","External",
                              "External", "External"))
net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)

ecolors <- c("mediumvioletred", "gray75", "gray45", "cornflowerblue", "mediumaquamarine")
alphaecolors <- alpha(ecolors, 0.5)
esizes <- c(5, 1, 2, 4, 3)
E(net)$color <- alphaecolors[as.factor(E(net)$type)]
E(net)$width <- esizes[as.factor(E(net)$type)]
# E(net)$label <- E(net)$id
# E(net)$label.color <- ecolors[as.factor(E(net)$type)]
E(net)$weight <- as.numeric(esizes[as.factor(E(net)$type)]) * 5


vcolors <- c("tomato", "orange", "black")
vsizes <- c(4, 2, 1)
V(net)$label <- V(net)$concepts
V(net)$color <- vcolors[as.factor(V(net)$type)]
V(net)$size <- vsizes[as.factor(V(net)$type)]*5
V(net)$label.cex <- sqrt(vsizes[as.factor(V(net)$type)]) / 2
V(net)$label.degree <- c(pi/2,pi/2, -1.5, -1.5, pi/2, pi/2, pi/2)


V(net)$frame.color <- "white"


environment(plot.igraph2) <- asNamespace('igraph')
environment(igraph.Arrows2) <- asNamespace('igraph')


l <- layout.norm(as.matrix(nodes[,c("lon", "lat")]))
plot.igraph2(net, edge.arrow.size=.4, edge.curved=0.55, layout=l,
             vertex.label.dist=2,
             vertex.label.font=2, vertex.label.color="black",
             #edge.label.color="gray20",
             edge.label.cex=0.75,
             edge.label.font=2,
              edge.arrow.size=2* (as.numeric(E(net)$weight)/
                                   as.numeric(max(E(net)$weight))/2),
             edge.arrow.width=2* (as.numeric(E(net)$weight)/
                                    max(as.numeric(E(net)$weight))),
             main = "Direct, indirect and retro effects \n of economic inequality on economic segregation")

legend(x=-1.5, y=-1.1, levels(as.factor(nodes$type)), pch=21,
       col="white", pt.bg=vcolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of concept")
legend(x=0.5, y=-1.1, levels(as.factor(links$type)), pch=21,
       col="white", pt.bg=alphaecolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of relation")

Coding framework for causal pathways analysis (Appendix D)

nodes <- data.frame(id = LETTERS[1:7],
                    concepts = c("Economic inequality","Economic segregation",
                                 "Labour ineq.",
                                 "School ineq.","Housing ineq.",
                                 "Other ineq. (health, crime...)",
                                 "Other"),
                    type = c("Main concept","Main concept",
                             "Mediator", "Mediator",
                             "Mediator","Mediator",
                             "Other"),
                    "lon" = c(1, 3, 2, 2, 2, 2, 2),
                    "lat" = c(2.5, 2.5, 4.5, 4, 3.5, 2, 1.5))


links <- data.frame( from = c("A","C", "D", "E", "F",
                              "G", "B", "C","D", "E",
                              "F", "G", "A", "A", "A",
                              "A", "A", "B", "B", "B",
                              "B", "B", "D", "C", "D",
                              "E", "E", "C", "G", "C",
                              "G", "G", "C", "D", "E",
                              "F", "G"),
                     to = c("B", "B","B", "B", "B",
                            "B", "A", "A", "A", "A",
                            "A", "A", "C", "D", "E",
                            "F", "G", "C", "D", "E",
                            "F", "G", "C", "D", "E",
                            "D", "C", "E", "C", "G",
                            "E", "F", "G", "G", "G",
                            "G", "D"),
                     id = 1:37,
                     type = c("Direct", "Mediated", "Mediated","Mediated","Mediated",
                              "External", "Direct", "Mediated", "Mediated","Mediated",
                              "Mediated", "External", "On mediators", "On mediators", "On mediators",
                              "On mediators", "External", "On mediators", "On mediators", "On mediators",
                              "On mediators", "External", "Inter-Mediator", "Inter-Mediator", "Inter-Mediator",
                              "Inter-Mediator", "Inter-Mediator", "Inter-Mediator", "External", "External",
                              "External", "External","External", "External","External",
                              "External", "External"))
net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)

ecolors <- c("mediumvioletred", "gray75", "gray45", "cornflowerblue", "mediumaquamarine")
alphaecolors <- alpha(ecolors, 0.5)
esizes <- c(5, 1, 2, 4, 3)
E(net)$color <- alphaecolors[as.factor(E(net)$type)]
E(net)$width <- esizes[as.factor(E(net)$type)]
E(net)$label <- E(net)$id
E(net)$label.color <- ecolors[as.factor(E(net)$type)]
E(net)$weight <- as.numeric(esizes[as.factor(E(net)$type)]) * 5


vcolors <- c("tomato", "orange", "black")
vsizes <- c(4, 2, 1)
V(net)$label <- V(net)$concepts
V(net)$color <- vcolors[as.factor(V(net)$type)]
V(net)$size <- vsizes[as.factor(V(net)$type)]*5
V(net)$label.cex <- sqrt(vsizes[as.factor(V(net)$type)]) / 2
V(net)$label.degree <- c(pi/2,pi/2, -1.5, -1.5, pi/2, pi/2, pi/2)


V(net)$frame.color <- "white"


environment(plot.igraph2) <- asNamespace('igraph')
environment(igraph.Arrows2) <- asNamespace('igraph')


l <- layout.norm(as.matrix(nodes[,c("lon", "lat")]))
plot.igraph2(net, edge.arrow.size=.4, edge.curved=0.55, layout=l,
             vertex.label.dist=2,
             vertex.label.font=2, vertex.label.color="black",
             #edge.label.color="gray20",
             edge.label.cex=0.75,
             edge.label.font=2,
             edge.arrow.size=2* (as.numeric(E(net)$weight)/
                                   as.numeric(max(E(net)$weight))/2),
             edge.arrow.width=2* (as.numeric(E(net)$weight)/
                                    max(as.numeric(E(net)$weight))),
             main = "Direct, indirect and retro effects \n of economic inequality on economic segregation")

legend(x=-1.5, y=-1.1, levels(as.factor(nodes$type)), pch=21,
       col="white", pt.bg=vcolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of concept")
legend(x=0.5, y=-1.1, levels(as.factor(links$type)), pch=21,
       col="white", pt.bg=alphaecolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of relation")

Mergin coding results with bibliometric information

form <- gsheet2tbl('https: //docs.google.com/spreadsheets/d/1iHgL9ziZ2IGhYXUqNmphgbUkfjfzTH4RVpBgq80A2Wc/edit?usp=sharing')


fdata <- left_join(form, asreview2, by=c("Article-ID"="UT (Unique WOS ID)")) %>%
  mutate(path = strsplit(gsub(";","",`Causal Path`), " "))

fdata |> 
  group_by(Elligibility, CorePaper) |>
  summarise(n = n(),
            minYear = min(`Publication Year`, na.rm=T),
            maxYear = max(`Publication Year`, na.rm=T),
            meanYear = mean(`Publication Year`, na.rm=T),
            medYear = median(`Publication Year`, na.rm=T),
            meanRefs = mean(`Cited Reference Count`),
            meanCites = mean(`Times Cited, All Databases`)
  )

## `summarise()` has grouped output by 'Elligibility'. You can override using the
## `.groups` argument.

## # A tibble: 2 × 9
## # Groups:   Elligibility [1]
##   Elligibility CorePaper     n minYear maxYear meanYear medYear meanRefs
##          <dbl>     <dbl> <int>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1            1         0    48    1991    2023    2016.    2018     59.3
## 2            1         1    32    1995    2023    2015.    2017     52.5
## # ℹ 1 more variable: meanCites <dbl>

fdata$date <- as.Date(fdata$Horodateur, format =  "%d/%m/%Y")
fdata$num <- as.numeric(fdata$date)
bin <- 30
ggplot(fdata, aes(date, ..count..)) + 
  geom_histogram(binwidth = bin, colour="black") +
  scale_x_date(breaks = seq(min(fdata$date), # change -20 term to taste
                            max(fdata$date)+1,
                            bin*2),
               labels = date_format("%b-%y"),
               limits = c(as.Date("2023-03-01"),
                          as.Date("2024-09-01"))) +
  ylab("Number of articles coded per month") + xlab("")

Analysis

Language & case study (figure 3A)

 summary(fdata$Language)

## English  French Spanish 
##      78       0       2

table(fdata$CorePaper, fdata$`if Location(s) where (Zone)?`)

##    
##     Argentina Australia
##   0         0         2
##   1         1         0
##    
##     Austria; Netherlands; Sweden; Spain; Norway; Estonia; UK; Hungary; Lithuania; Greece; Czech Republic; Latvia
##   0                                                                                                            0
##   1                                                                                                            1
##    
##     Brazil Canada China Finland France Germany
##   0      0      0     3       1      1       1
##   1      1      2     0       0      0       0
##    
##     Greece; Hungary; Finland; Spain; Italy; Norway; Sweden; Estonia Israel
##   0                                                               0      1
##   1                                                               1      0
##    
##     Netherlands Norway South Africa Sweden UK United States of America
##   0           0      1            1      1  2                       27
##   1           1      1            0      4  1                        9
##    
##     United States of America, France
##   0                                0
##   1                                1
##    
##     United States of America, Netherlands, France, UK, Australia, South Africa, Denmark, Canada, Brazil; Ireland; Mexico; New Zealand;
##   0                                                                                                                                  1
##   1                                                                                                                                  0
##    
##     Uruguay
##   0       0
##   1       1

Disciplinary partition (Appendix E)

  fdata$publication = as_factor(fdata$`Source Title`)
 
discipline <- read_csv("data/journal_lookup.csv")

## Rows: 59 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): journal, discipline
## dbl (1): papers
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

  fdata <- left_join(fdata, discipline, by = c("publication" = "journal"))
  
  
  
  fdata$discipline <- factor(fdata$discipline, levels = 
                               c("SOC" , "ECON", "URBREG",
                                 "GEOG", "DEMO", "CRIMEPID",
                                 "PHYMAT", "OTHER"))
  
  summary(fdata$discipline)

##      SOC     ECON   URBREG     GEOG     DEMO CRIMEPID   PHYMAT    OTHER 
##       18       16       15        8        6        5        3        9

  fdata$year <- as.numeric(fdata$`Publication Year`)

Distribution by disciplines (Figure 2A)

  ggplot(fdata, aes(x = year, fill = discipline)) +
    geom_histogram()  +
    ylab("Number of eligible article published")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Conceptual understanding

fdata$SegAsResidential <- as.factor(ifelse(
  grepl("Residential segregation", form$`Concept for "segregation"`, fixed = TRUE),1,0
))
fdata$SegAsConcentration <- as.factor(ifelse(
  grepl("Concentration", 
        form$`Concept for "segregation"`, fixed = TRUE) ,1,0
))

fdata$SegAsOther <- as.factor(ifelse(
  fdata$SegAsResidential == 0 & 
    fdata$SegAsConcentration == 0,
  1,0
))

summary(fdata$SegAsResidential)

##  0  1 
## 16 64

summary(fdata$SegAsConcentration)

##  0  1 
## 70 10

  summary(fdata$SegAsOther)

##  0  1 
## 71  9

  fdata$IneqAsDist <- as.factor(ifelse(
    grepl("Skewed distribution", 
          form$`Concept for "inequality"`, fixed = TRUE),1,0
  ))
  fdata$IneqAsConcentration <- as.factor(ifelse(
    grepl("Economic concentration (top income shares)", 
          form$`Concept for "inequality"`, fixed = TRUE) ,1,0
  ))
  
  fdata$IneqAsOther <- as.factor(ifelse(
    fdata$IneqAsDist == 0 & 
      fdata$IneqAsConcentration == 0,
    1,0
  ))
  
  summary(fdata$IneqAsDist)

##  0  1 
## 33 47

  summary(fdata$IneqAsConcentration)

##  0  1 
## 71  9

  summary(fdata$IneqAsOther)

##  0  1 
## 48 32

  fdata$EconAsIncome <- as.factor(ifelse(
    grepl("Income", 
          form$`Concept for "Economic"`, fixed = TRUE) |
      grepl("income", 
            form$`Concept for "Economic"`, fixed = TRUE),1,0
  ))
  fdata$EconAsWealth <- as.factor(ifelse(
    grepl("Wealth", 
          form$`Concept for "Economic"`, fixed = TRUE) ,1,0
  ))
  
  fdata$EconAsOther <- as.factor(ifelse(
    (grepl("Wealth, ", 
          form$`Concept for "Economic"`, fixed = TRUE) &
       fdata$EconAsIncome == 0 ) | 
      (grepl("Income, ", 
             form$`Concept for "Economic"`, fixed = TRUE) &
         fdata$EconAsWealth == 0 ) | 
      (grepl("Wealth, ", 
             form$`Concept for "Economic"`, fixed = TRUE) &
         grepl("Income, ", 
               form$`Concept for "Economic"`, fixed = TRUE)) |
      (fdata$EconAsWealth == 0 &
         fdata$EconAsIncome == 0 )  ,
    1,0
  ))
  
  summary(fdata$EconAsIncome)

##  0  1 
##  7 73

  summary(fdata$EconAsWealth)

##  0  1 
## 67 13

  summary(fdata$EconAsOther)

##  0  1 
## 65 15

Concept distribution

fdata$ID <- fdata$`Article-ID`
   segconcept <- fdata[,c("ID","SegAsResidential", "SegAsConcentration", "SegAsOther")]
  xseg <- list(
    Residential = unlist(as.list(segconcept[segconcept$SegAsResidential == 1,"ID"])), 
    Concentration = unlist(as.list(segconcept[segconcept$SegAsConcentration == 1,"ID"])), 
    Other = unlist(as.list(segconcept[segconcept$SegAsOther == 1,"ID"])))
  ggVennDiagram(xseg, label_alpha = 0.2, edge_lty = 0, set_size = 2.7) +
    ggplot2::scale_fill_gradient(low="white",high = "aquamarine3")+
    ggplot2::ggtitle("How is segregation conceptualized?", 
                     subtitle = "Eligible papers: n=80") +
    ggplot2::theme(legend.position='right', 
                   legend.justification='right',
                   legend.direction='vertical')

  ineqconcept <- fdata[,c("ID","IneqAsDist", "IneqAsConcentration", "IneqAsOther")]
  xineq <- list(
    Distribution = unlist(as.list(ineqconcept[ineqconcept$IneqAsDist == 1,"ID"])), 
    Concentration = unlist(as.list(ineqconcept[ineqconcept$IneqAsConcentration == 1,"ID"])), 
    Other = unlist(as.list(ineqconcept[ineqconcept$IneqAsOther == 1,"ID"])))
  ggVennDiagram(xineq, label_alpha = 0.2, edge_lty = 0, set_size = 2.7) +
    ggplot2::scale_fill_gradient(low="white",high = "#FFA500")+
    ggplot2::ggtitle("How is inequality conceptualized?", 
                     subtitle = "Eligible papers: n=80") +
    ggplot2::theme(legend.position='right', 
                   legend.justification='right',
                   legend.direction='vertical')

  econconcept <- fdata[,c("ID","EconAsIncome", "EconAsWealth", "EconAsOther")]
  xecon <- list(
    Income = unlist(as.list(econconcept[econconcept$EconAsIncome == 1,"ID"])), 
    Wealth = unlist(as.list(econconcept[econconcept$EconAsWealth == 1,"ID"])), 
    Other = unlist(as.list(econconcept[econconcept$EconAsOther == 1,"ID"])))
  ggVennDiagram(xecon, label_alpha = 0.2, edge_lty = 0, set_size = 2.7) +
    ggplot2::scale_fill_gradient(low="white",high = "#C77398")+
    ggplot2::ggtitle("How is the economic resource conceptualized?", 
                     subtitle = "Eligible papers: n=80") +
    ggplot2::theme(legend.position='right', 
                   legend.justification='right',
                   legend.direction='vertical')

Query about papers using wealth as economic concept:

  fdata |>
    filter(EconAsWealth == 1) |>
    select(Authors, year, `Concept for "Economic"`,CorePaper, `if Location(s) where (Zone)?`,
           `Methods used for assessing effects and relation between segregation and inequality`)

## # A tibble: 13 × 6
##    Authors          year Concept for "Economi…¹ CorePaper if Location(s) where…²
##    <chr>           <dbl> <chr>                      <dbl> <chr>                 
##  1 Wessel, T        2022 Income, Wealth, Socia…         0 Norway                
##  2 Fernandes, L; …  2020 Income, Wealth                 1 <NA>                  
##  3 Levy, BL         2022 Wealth                         0 United States of Amer…
##  4 De la Cruz-Vie…  2018 Wealth                         0 United States of Amer…
##  5 Hochstenbach, C  2018 Income, Wealth                 1 Netherlands           
##  6 Panagiotakopou…  2022 Income, Wealth                 0 <NA>                  
##  7 Gordon, C; Bru…  2020 Wealth, housing equity         0 United States of Amer…
##  8 ViforJ, RO; Cl…  2023 Wealth, intergenerati…         0 Australia             
##  9 Wessel, T        2016 Income, Wealth                 1 Norway                
## 10 Reardon, SF; F…  2015 Income, Wealth                 0 United States of Amer…
## 11 Fang, M; Huang…  2022 Income, Wealth, wages…         0 China                 
## 12 Bonakdar, SB; …  2023 Income, Wealth                 0 <NA>                  
## 13 Thomas, H; Man…  2018 Income, Wealth                 0 United States of Amer…
## # ℹ abbreviated names: ¹`Concept for "Economic"`,
## #   ²`if Location(s) where (Zone)?`
## # ℹ 1 more variable:
## #   `Methods used for assessing effects and relation between segregation and inequality` <chr>

Gini distribution (figure 3B)

averageGini <- read_csv("data/country_Gini.csv")

## Rows: 21 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Zone
## dbl (1): LatestAverageGini
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

worldGini <- read_csv("data/country_Gini_context.csv")

## Rows: 145 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country Name, Country Code, Indicator Name, Indicator Code
## dbl (1): LatestGini
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

analysisPerGiniLevel <- left_join(fdata, averageGini, by = 
                                    c(`if Location(s) where (Zone)?` = "Zone")
                                  ) 

ggplot() +
  geom_density(data = worldGini, aes(x=LatestGini), 
               fill = alpha("#FF8095", 0.6),
               color=NA) +
  geom_density(data = analysisPerGiniLevel, aes(x=LatestAverageGini),
               fill = alpha("#FFAA00", 0.6), 
               color=NA) +
  xlab("Latest value of national Gini") +
  ylab("Density distribution") +
  ggtitle("Eligible articles | n = 80") +
  scale_y_continuous(limits = c(0,0.115)) +
   ggplot2::annotate("text", x=50, y= 0.023, label="World", color=alpha("#FF8095", 0.9)) +
  ggplot2::annotate("text", x=50, y= 0.098, label="Countries studied", color=alpha("#FFAA00", 0.9)) +
   theme_light()

ggplot() +
  geom_density(data = worldGini, aes(x=LatestGini), 
               fill = alpha("#FF8095", 0.6),
               color=NA) +
  geom_density(data = analysisPerGiniLevel[analysisPerGiniLevel$CorePaper == 1,], aes(x=LatestAverageGini),
               fill = alpha("#FFAA00", 0.6), 
               color=NA) +
  xlab("Latest value of national Gini") +
  ylab("Density distribution") +
  scale_y_continuous(limits = c(0,0.115)) +
  ggtitle("Core articles | n = 32") +
  ggplot2::annotate("text", x=50, y= 0.023, label="World", 
                    color=alpha("#FF8095", 0.9)) +
  ggplot2::annotate("text", x=50, y= 0.068, label="Countries studied", 
                    color=alpha("#FFAA00", 0.9)) +
  theme_light()

Methodology & study design

  fdata$AnalyticalModel <- as.factor(ifelse(
    grepl("analytical model", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE) | 
      grepl("Simulation", form$`Methods used for assessing effects and relation between segregation and inequality`, 
       fixed = TRUE),1,0
  ))
  fdata$DiscourseAnalysis <-as.factor(ifelse(
    grepl("Discourse analysis", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE),1,0
  ))

  fdata$HistoricalAnalysis <-as.factor(ifelse(
    grepl("Historical analysis", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE),1,0
  ))
  
  fdata$IndexAnalysis <-as.factor(ifelse(
    grepl("Index comparison", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE),1,0
  ))
  
  fdata$MapComparison <-as.factor(ifelse(
    grepl("Map comparison", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE),1,0
  ))
  
  fdata$Simulation <-as.factor(ifelse(
    grepl("Simulation", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE),1,0
  ))
  
  fdata$Statistical <-as.factor(ifelse(
    grepl("Statistical regression/correlation", 
          form$`Methods used for assessing effects and relation between segregation and inequality`, 
          fixed = TRUE),1,0
  ))
  
  methodo <- fdata |>
    select(Authors, year, CorePaper, `if Location(s) where (Zone)?`, discipline,
           HistoricalAnalysis, AnalyticalModel, DiscourseAnalysis, IndexAnalysis, 
           MapComparison, Statistical, 
           `Type of analysis`, `Scale of analysis`, `Unit of analysis`, `Time frame of study`)
  
  summary(methodo)

##    Authors               year        CorePaper   if Location(s) where (Zone)?
##  Length:80          Min.   :1991   Min.   :0.0   Length:80                   
##  Class :character   1st Qu.:2014   1st Qu.:0.0   Class :character            
##  Mode  :character   Median :2018   Median :0.0   Mode  :character            
##                     Mean   :2015   Mean   :0.4                               
##                     3rd Qu.:2021   3rd Qu.:1.0                               
##                     Max.   :2023   Max.   :1.0                               
##                     NA's   :2                                                
##    discipline HistoricalAnalysis AnalyticalModel DiscourseAnalysis
##  SOC    :18   0:63               0:61            0:77             
##  ECON   :16   1:17               1:19            1: 3             
##  URBREG :15                                                       
##  OTHER  : 9                                                       
##  GEOG   : 8                                                       
##  DEMO   : 6                                                       
##  (Other): 8                                                       
##  IndexAnalysis MapComparison Statistical Type of analysis   Scale of analysis 
##  0:53          0:72          0:33        Length:80          Length:80         
##  1:27          1: 8          1:47        Class :character   Class :character  
##                                          Mode  :character   Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  Unit of analysis   Time frame of study
##  Length:80          Length:80          
##  Class :character   Class :character   
##  Mode  :character   Mode  :character   
##                                        
##                                        
##                                        
##

Methodological distribution (Figure 4)

  p1 <- methodo |>
  ggplot(aes(x=Statistical)) +
    geom_bar(fill="blue") +
    facet_wrap(~discipline, ncol = 4) +
    xlab("Use of statistical regression models") +
    ylab("Number of articles") +
    theme(axis.title.x = element_text(colour = "blue")) +
    scale_y_continuous(limits = c(0,  17)) +
     scale_x_discrete(labels = NULL,
    limits = c(1))
  
  p2 <- methodo |>
  ggplot(aes(x=AnalyticalModel)) +
    geom_bar(fill="orange") +
    facet_wrap(~discipline, ncol = 4) +
    xlab("Use of analytical models and simulation") +
    ylab("Number of articles") +
    theme(axis.title.x = element_text(colour = "orange"))+
    scale_y_continuous(limits = c(0, 17))+
    scale_x_discrete(labels = NULL,
    limits = c(1))
  
  p3 <- methodo |>
  ggplot(aes(x=IndexAnalysis)) +
    geom_bar(fill="forestgreen") +
    facet_wrap(~discipline, ncol = 4) +
    xlab("Use of index comparison") +
    ylab("Number of articles") +
    theme(axis.title.x = element_text(colour = "forestgreen"))+
    scale_y_continuous(limits = c(0, 17))+
     scale_x_discrete(labels = NULL,
    limits = c(1))
  
  p4 <-  methodo |>
  ggplot(aes(x=HistoricalAnalysis)) +
    geom_bar(fill="firebrick3") +
    facet_wrap(~discipline, ncol = 4) +
    xlab("Use of historical analysis") +
    ylab("Number of articles") +
    theme(axis.title.x = element_text(colour = "firebrick3"))+
    scale_y_continuous(limits = c(0, 17))+
    scale_x_discrete(labels = NULL,
    limits = c(1))
  
  p5 <-methodo |>
  ggplot(aes(x=MapComparison)) +
    geom_bar(fill="aquamarine3") +
    facet_wrap(~discipline, ncol = 4) +
    xlab("Use of map comparisons") +
    ylab("Number of articles") +
    theme(axis.title.x = element_text(colour = "aquamarine3"))+
    scale_y_continuous(limits = c(0,17))+
     scale_x_discrete(labels = NULL,
    limits = c(1))
  
  p6 <- methodo |>
  ggplot(aes(x=DiscourseAnalysis)) +
    geom_bar(fill="#C77398") +
   facet_wrap(~discipline, ncol = 4) +
     xlab("Use of discourse analysis") +
    ylab("Number of articles") +
    theme(axis.title.x = element_text(colour = "#C77398"))+
    scale_y_continuous(limits = c(0, 17))+
    scale_x_discrete(labels = NULL,
    limits = c(1))
  
  grid.arrange(p1, p2, p3,
               p4, p5, p6, ncol=3)

Time frame of studies (Appendix G)

longitudinal <- fdata |>
    filter(grepl(":", 
          form$`Time frame of study`, 
          fixed = TRUE) | 
            grepl("-", 
                  form$`Time frame of study`, 
                  fixed = TRUE)) |>
    select(Authors, year, `Time frame of study`) # |>
  
  data <- read_csv("data/longitudinal_studies_hand.csv") |>
    mutate(start = ymd(paste0(start,"01-01")), 
           end = ymd(paste0(end,"01-01"))) |>
    arrange(desc(end)) |>
    pivot_longer(cols=c("start", "end"),
                 names_to="date_type",
                 values_to = "date")

## Rows: 22 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): reference
## dbl (2): start, end
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Distribution of coefficient values (Figure 6)

  ggplot(data, aes(x=fct_inorder(reference), y=date)) +
    geom_line(size=0.5, col="blue") + 
    ylab("") + xlab("") +
    coord_flip() +
    theme_minimal()

Units and Scales

  fdata$nationalScale <- as.factor(ifelse(
    grepl("National", 
          form$`Scale of analysis`, 
          fixed = TRUE) ,1,0
  ))
  fdata$metroScale <- as.factor(ifelse(
    grepl("Urban (metropolitan)", 
          form$`Scale of analysis`, 
          fixed = TRUE) ,1,0
  ))
  table(fdata$nationalScale, fdata$discipline)

##    
##     SOC ECON URBREG GEOG DEMO CRIMEPID PHYMAT OTHER
##   0   4    7     11    6    1        4      2     6
##   1  14    9      4    2    5        1      1     3

  table(fdata$metroScale, fdata$discipline)

##    
##     SOC ECON URBREG GEOG DEMO CRIMEPID PHYMAT OTHER
##   0  14   10      8    3    6        5      2     3
##   1   4    6      7    5    0        0      1     6

  fdata$indUnit <- as.factor(ifelse(
    grepl("Individual", 
          form$`Unit of analysis`, 
          fixed = TRUE) ,1,0
  ))
  fdata$houseUnit <- as.factor(ifelse(
    grepl("Household", 
          form$`Unit of analysis`, 
          fixed = TRUE) ,1,0
  ))
  
  fdata$neighUnit <- as.factor(ifelse(
    grepl("Neighbourhood", 
          form$`Unit of analysis`, 
          fixed = TRUE) ,1,0
  ))
  
  summary(fdata$indUnit)

##  0  1 
## 47 33

  summary(fdata$houseUnit)

##  0  1 
## 54 26

  summary(fdata$neighUnit)

##  0  1 
## 68 12

SLR Results

Causal pathways

# Causal Paths
  
  fishgraph <- function(data, nodes, links, ID="all", legend=T){
    if(ID == "all"){
      fdata <- data %>%
        filter(CorePaper == 0) 
      pastetext <- paste0("Distribution of effects in non-core articles | n = ", dim(fdata[!is.na(fdata$path),])[1])
    } else {
      if(ID == "core"){
        fdata <- data %>%
          filter(CorePaper == 1) 
        pastetext <- paste0("Distribution of effects in core articles | n = ", dim(fdata[!is.na(fdata$path),])[1])
        
      } else {
        fdata <- data %>%
          filter(`Article-ID` == ID)
        dim(fdata[!is.na(fdata$path),])[1]
        pastetext <- paste0("Article = ", fdata$Authors,
                            ", ", fdata$`Publication Year`)
      }}
    
    dt_list <- map(fdata$path, as.data.table)
    dt <- as.data.frame(rbindlist(dt_list, fill = TRUE, idcol = T))
    colnames(dt) <- c("ID", "path")
    dt$path <- as.numeric(dt$path)
    
    freq <- dt %>%
      group_by(path) %>%
      count(.)
    # 
    # freq <- freq[freq$path %in% as.factor(1:37),]
    # freq$f <- freq$n / sum(freq$n) * 100
    # freq$id <- as.numeric(as.character(freq$path))
    # links <- left_join(links, freq, by="id")
    # 
    freq <- freq[freq$path %in% as.factor(1:37),]
    freq$f <- freq$n 
    freq$id <- as.numeric(as.character(freq$path))
    links <- left_join(links, freq, by="id")
    
    net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)
    ecolors <- c("mediumvioletred", "gray75", "gray45", "cornflowerblue", "mediumaquamarine")
    alphaecolors <- alpha(ecolors, 0.6)
    E(net)$color <- ifelse(!is.na(E(net)$f), alphaecolors[as.factor(E(net)$type)], alpha("white", 0))
    E(net)$width <- E(net)$f
    E(net)$weight <- E(net)$f
    E(net)$label <- ifelse(E(net)$f>3, paste0(round(E(net)$f, 0), "%"), NA)
    E(net)$label.color <- ifelse(!is.na(E(net)$f), ecolors[as.factor(E(net)$type)], alpha("white", 0))
    
    vcolors <- c("tomato", "orange", "black")
    vsizes <- c(4, 2, 1)
    V(net)$label <- V(net)$concepts
    V(net)$color <- vcolors[as.factor(V(net)$type)]
    V(net)$size <- vsizes[as.factor(V(net)$type)]*5
    V(net)$label.cex <- sqrt(vsizes[as.factor(V(net)$type)]) / 2
    V(net)$label.degree <- c(pi,2*pi, -1.5, -1.5, pi/2, pi/2, pi/2)
    
    V(net)$frame.color <- "white"
    
    
    plot.igraph2(net, edge.arrow.size=.6, edge.curved=0.4, layout=l,
                 vertex.label.dist=2,
                 vertex.label.font=2, vertex.label.color="black",
                 edge.label.cex=0.75,
                 edge.label.font=2,
                 main = pastetext
    )
    
    if(legend == T){
      legend(x=-1.5, y=-1.1, levels(as.factor(nodes$type)), pch=21,
             col="white", pt.bg=vcolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of concept")
      legend(x=0.5, y=-1.1, levels(as.factor(links$type)), pch=21,
             col="white", pt.bg=alphaecolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of relation")
    }
  }

Direction of causality

   dt_list <- map(fdata$path, as.data.table)
   dt <- as.data.frame(rbindlist(dt_list, fill = TRUE, idcol = T))
   colnames(dt) <- c("ID", "path")
   dt$path <- as.numeric(dt$path)
   
   itos <- c(13,2,14,3,15,4,
             1,16,5,17,6)
   stoi <- c(18,8,19,9,20,10,
             7,21,11,22,12)
   mtom <- 23:37
     
     i_dir <- data.frame()
     
  for(i in 1:80){
    paths <- dt |>
      filter(ID == i)
    i_dir[i,"ItoS"] <- ifelse("TRUE" %in% (paths$path %in% itos), 1, 0)
    i_dir[i,"StoI"] <- ifelse("TRUE" %in% (paths$path %in% stoi), 1, 0)
    i_dir[i,"MtoM"] <- ifelse("TRUE" %in% (paths$path %in% mtom), 1, 0)
  } 
   
fdata$ItoS <- i_dir$ItoS
fdata$StoI <- i_dir$StoI
fdata$MtoM <- i_dir$MtoM
   
directions <- fdata[fdata$CorePaper==0,c("ID","ItoS", "StoI", "MtoM")]
xdir <- list(
  IneqToSeg = unlist(as.list(directions[directions$ItoS == 1,"ID"])), 
  SegToIneq = unlist(as.list(directions[directions$StoI == 1,"ID"])), 
  MedToMed = unlist(as.list(directions[directions$MtoM == 1,"ID"])))

direction_core <- fdata[fdata$CorePaper==1,c("ID","ItoS", "StoI", "MtoM")]
xdircore <- list(
  IneqToSeg = unlist(as.list(direction_core[direction_core$ItoS == 1,"ID"])), 
  SegToIneq = unlist(as.list(direction_core[direction_core$StoI == 1,"ID"])), 
  MedToMed = unlist(as.list(direction_core[direction_core$MtoM == 1,"ID"])))

Directionality

fishgraph(fdata, nodes, links, legend = F)

  fishgraph(fdata, nodes, links, "core", legend = F)

  ggVennDiagram(xdir, label_alpha = 0.4, edge_lty = 0.1, set_size = 3.7) +
  ggplot2::scale_fill_gradient(low="white",high = "black")+
  ggplot2::ggtitle("Direction of causality assumed", 
                   subtitle = "Eligible papers: n=48") +
  ggplot2::theme(legend.position='right', 
                 legend.justification='right',
                 legend.direction='vertical')

ggVennDiagram(xdircore, label_alpha = 0.4, edge_lty = 0.1, set_size = 3.7) +
  ggplot2::scale_fill_gradient(low="white",high = "black")+
  ggplot2::ggtitle("Direction of causality assumed", 
                   subtitle = "Eligible papers: n=32") +
  ggplot2::theme(legend.position='right', 
                 legend.justification='right',
                 legend.direction='vertical')

Actors’ wordcloud (Appendix H)

answerWordcloud <- function(data, variableName,  core = F, seednb = 123){
  if(core == T){
    data <- data %>% 
      filter(CorePaper == 1)
  }
  objectList <- map(data[[variableName]], as.data.table)
  allobjects <- as.data.frame(rbindlist(objectList, fill = TRUE, idcol = T))
  colnames(allobjects) <- c("ID", "var")
  words <- allobjects %>% 
    count(var, sort=TRUE) %>% 
    filter(var != "NA")
  
  set.seed(seednb) # for reproducibility 
  wordcloud(words = words$var, freq = words$n, min.freq = 1,
            max.words=200, random.order=FALSE, rot.per=0.35,
            colors=brewer.pal(8, "Paired"))
}

descdata <- fdata %>%
  mutate(actors = strsplit(`Actors involved (separated by ";")`, "; "))

Wordcloud of actors (Appendix H)

answerWordcloud(descdata, "actors")

answerWordcloud(descdata,  core=T, "actors")

Estimated coefficients (Appendix I)

quant_summary_coeff <- read.csv("data/quant_summary_coeff.csv") |>
  mutate(low = Coeff - `Standard.Error`,
         high = Coeff + `Standard.Error`,
         `Comparable\nspecifications?` = ifelse(Ref %in% c("Reardon & Bischoff, 2011","Mutgan & Mijs, 2023", "Simpson et al., 2023"), "yes", "no")) |>
  mutate(Reference = factor(Ref, levels = c("Mutgan & Mijs, 2023", "Simpson et al., 2023","Hu & Liang, 2022", "Owens, 2016","Reardon & Bischoff, 2011","Watson, 2009", "Telles, 1995"))) 



ggplot(quant_summary_coeff, aes(x = Reference, 
                                colour = `Comparable\nspecifications?`,
                                y = Coeff)) +
  geom_point() +
  geom_errorbar(aes(ymin = low, ymax = high), width = 0.2)+
  scale_colour_manual(values=c("aquamarine3","orange"))+
  geom_hline(yintercept=0, colour = "black") +
  ylab("Estimated values of coefficient in the study")+
  coord_flip()