Companion notebook of the Social Forces 2025 article: “Economic inequality and economic segregation: a systematic review of causal pathways” by Clémentine Cottineau-Mugadza within the ERC project SEGUE.
Graph to follow the evolution of manual screening (Appendix A)
globalplot <- function(data){
categorised <- data %>%
filter(CAT_title %in% LETTERS[1:8])
ncoded <- dim(categorised)[1]
print(paste0("# of A: ", dim(filter(categorised,CAT_title == "A"))[1]))
print(paste0("# of Core: ", dim(filter(categorised,Core == 1))[1]))
p<- categorised %>%
ggplot(aes(CAT_title)) +
geom_bar(fill = c("goldenrod1", "grey25","grey40","goldenrod3",
"grey75","grey85","grey95","white")) +
ggtitle(paste0("n=",ncoded, " | ",round(ncoded/dim(wos_cat)[1] * 100,1), "%"))
p
print(p)
ggsave(paste0("histevol_",ncoded,".png"))
}
evolhist <- function(data, binsize){
categorised <- data %>%
filter(CAT_title %in% LETTERS[1:8])
ncoded <- dim(categorised)[1]
categorised$n <- cut(as.numeric(rownames(categorised)),seq(0,ncoded, by=binsize))
lastA <- sort(as.numeric(rownames(categorised[categorised$CAT_title == "A",])),decreasing = T)[1]
print(paste0("# since last A: ", ncoded - lastA))
sumplot <- categorised %>%
mutate(Bin = gsub("\\(|\\]", "", n)) %>%
separate(Bin, sep = ",", into = c("lower", "upper")) %>%
mutate(across(lower:upper, as.numeric)) %>%
mutate(`Sample` = (upper + lower) / 2)
p<- ggplot(data = sumplot,
aes(x=Sample,fill=CAT_title)) +
geom_histogram(position = position_fill(reverse = TRUE),
stat="count", colour="white", width=binsize)+
scale_fill_manual(values = c("goldenrod1", "grey25","grey40","goldenrod3",
"grey75","grey85","grey95","white")) +
labs(x = "Sample reviewed", y="Frequency") +
ggtitle(paste0("n=",ncoded, " | ",round(ncoded/dim(wos_cat)[1] * 100,1), "%"))
print(p)
print(table(sumplot$Sample, sumplot$CAT_title))
p
ggsave(paste0("histevol_",ncoded,".png"))
}
Distribution of records by category (Appendix A)
wos_cat<- read.csv2("data/wos_list2_CAT_CC.csv",sep=";")
globalplot(wos_cat)
## [1] "# of A: 101"
## [1] "# of Core: 5"
## Saving 7 x 5 in image
evolhist(data=wos_cat, binsize = 500)
## [1] "# since last A: 1"
##
## A B C D E F G H
## 250 57 36 220 15 30 87 32 23
## 750 6 16 272 7 41 97 38 23
## 1250 4 13 112 7 22 166 94 82
## 1750 6 13 65 17 18 190 109 82
## 2250 4 11 74 13 24 180 106 88
## 2750 5 13 64 7 20 173 109 109
## 3250 6 20 70 7 35 177 89 96
## 3750 1 7 56 11 24 153 100 148
## 4250 1 6 54 13 36 169 96 125
## 4750 4 11 59 7 16 158 78 167
## 5250 3 12 61 21 26 166 76 135
## Saving 7 x 5 in image
Random sampling of title examples for each category
randomtitles <- function(data, n){
titletable <- data.frame()
for(i in LETTERS[1:8]){
cattitles <- as_tibble(data) %>%
filter(CAT_title == i) %>%
dplyr::select(.,Article.Title)
titletable[1:n,i] <- sample(cattitles$Article.Title, n, replace = FALSE, prob = NULL)
}
titletable
}
titlextab <- randomtitles(wos_cat,10)
head(titlextab)
## A
## 1 Neighbourhood effects and beyond: Explaining the paradoxes of inequality in the changing American metropolis
## 2 The Income-Inequality Relationship within US Metropolitan Areas 1980-2016
## 3 Black-white income inequality and metropolitan socioeconomic structure
## 4 Sociocultural, economic and ethnic homogeneity in residential mobility and spatial sorting among couples
## 5 Expanding homes and increasing inequalities: US housing development and the residential segregation of the affluent
## 6 Inequality, residential segregation by income, and mortality in US cities
## B
## 1 Neighborhoods on the Rise: A Typology of Neighborhoods Experiencing Socioeconomic Ascent
## 2 Microeconomic model of residential location incorporating life cycle and social expectations
## 3 The Alibaba effect: Spatial consumption inequality and the welfare gains from e-commerce
## 4 Housing Decisions Among Low-Income Hispanic Households in Chicago
## 5 Small-Area Incomes: Their Spatial Variability and the Relative Efficacy of Proxy, Geodemographic, Imputed and Model-Based Estimates
## 6 How economic segregation affects children's educational attainment
## C
## 1 ON INTERGENERATIONAL IMMOBILITY: EVIDENCE THAT ADULT CREDIT HEALTH REFLECTS THE CHILDHOOD ENVIRONMENT
## 2 Which Income Inequality Influences Which Health Indicators? Analysis of the Income Inequality Hypothesis with Market and Disposable Gini Indicators
## 3 Racial economic subordination and white gain in the U.S. South
## 4 Untangling the Complexity of the Association between Contracting and Local Fiscal Performance and Income Inequality in Terms of Competing Values
## 5 Generalizing the Inequality Process' gamma model of particle wealth statistics
## 6 FOSSIL FUEL SUBSIDIES, INCOME INEQUALITY, AND POVERTY: EVIDENCE FROM DEVELOPING COUNTRIES
## D
## 1 Migration, ethnicity, and inequality: Homeownership in Israel
## 2 Observed trends in the magnitude of socioeconomic and area-based inequalities in use of caesarean section in Ethiopia: a cross-sectional study
## 3 REGULATION AND THE GEOGRAPHY OF INEQUALITY
## 4 An analysis of the nutrition status of neighboring Indigenous and non-Indigenous populations in Kanungu District, southwestern Uganda: Close proximity, distant health realities
## 5 Social inequality and urban regeneration in Barcelona city centre: reconsidering success
## 6 Inequality, neighbourhoods and welfare of the poor
## E
## 1 Making do: Religious segregation and everyday water struggles
## 2 The interaction of segregation and suburbanization in an agent-based model of residential location
## 3 Through the bridges: the Black Cultural Association in Sao Paulo, urban planning and the contours of the white city
## 4 Birth outcomes among urban African-American women: A multilevel analysis of the role of racial residential segregation
## 5 Housing Liberalisation and Gentrification: The Social Effects of Tenure Conversions in Amsterdam
## 6 'Now the German comes': The ethnic effect of gentrification in Berlin
## F
## 1 Trends in Inequality in Food Consumption and Calorie Intake in India: Evidence from the Last Three Decades, 1983-2012
## 2 Trade liberalisation, poverty and inequality in South Africa: A computable general equilibrium-microsimulation analysis
## 3 Channels of Inequality of Opportunity: The Role of Education and Occupation in Europe
## 4 What effect does inequality have on residents' sense of safety? Exploring the mediating processes of social capital and civic engagement
## 5 Poverty alleviation, inequality and welfare in rural China
## 6 Socioeconomic inequalities in women's access to health care: has Ecuadorian health reform been successful?
## G
## 1 Cost Distortions and Structural Imbalances in China
## 2 Some alternative geo-economics for Europe's regions
## 3 Global Apartheid. Development and Underdevelopment after Globalization
## 4 DOES UNDERNUTRITION RESPOND TO INCOMES AND PRICES - DOMINANCE TESTS FOR INDONESIA
## 5 Taylor linearization sampling errors and design effects for poverty measures and other complex statistics
## 6 Labor Market Segmentation in Urumqi, Xinjiang: Exposing Labor Market Segments and Testing the Relationship between Migration and Segmentation
## H
## 1 Unpacking democracy: The effects of different democratic qualities on climate change performance over time
## 2 Contextualizing the COVID-19 Era in Puerto Rico: Compounding Disasters and Parallel Pandemics
## 3 Structural factors and black interracial homicide: A new examination of the causal process
## 4 International socioeconomic inequality drives trade patterns in the global wildlife market
## 5 Does the median voter model explain the size of government?: Evidence from the states
## 6 Subjective well-being in the new China: religion, social capital, and social status
AI-supported screening
asreview1<- read_excel("data/asreview_dataset_all_economic-inequality-and-urban-economic-segregation-segue-1stRound.xlsx")
conting <- table(asreview1$included, asreview1$CAT_title)
conting
##
## A B C D E F G H
## 0 0 48 73 18 77 44 14 25
## 1 82 21 1 4 6 1 1 0
prop.table(conting,2)
##
## A B C D E F
## 0 0.00000000 0.69565217 0.98648649 0.81818182 0.92771084 0.97777778
## 1 1.00000000 0.30434783 0.01351351 0.18181818 0.07228916 0.02222222
##
## G H
## 0 0.93333333 1.00000000
## 1 0.06666667 0.00000000
summary(asreview1)
## record_id Title CAT_title Abstract
## Min. : 0 Length:20369 Length:20369 Length:20369
## 1st Qu.: 5092 Class :character Class :character Class :character
## Median :10184 Mode :character Mode :character Mode :character
## Mean :10184
## 3rd Qu.:15276
## Max. :20368
##
## Authors Source Title Journal Abbreviation Publication Year
## Length:20369 Length:20369 Length:20369 Min. :1985
## Class :character Class :character Class :character 1st Qu.:2012
## Mode :character Mode :character Mode :character Median :2017
## Mean :2015
## 3rd Qu.:2020
## Max. :2023
## NA's :567
## Volume Issue Start Page End Page
## Min. : 1.00 Min. : 1 Length:20369 Length:20369
## 1st Qu.: 18.00 1st Qu.: 2 Class :character Class :character
## Median : 36.00 Median : 3 Mode :character Mode :character
## Mean : 57.09 Mean : 1080
## 3rd Qu.: 63.00 3rd Qu.: 5
## Max. :2224.00 Max. :45271
## NA's :1231 NA's :4387
## DOI Language Author Keywords Cited Reference Count
## Length:20369 Length:20369 Length:20369 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 32.00
## Mode :character Mode :character Mode :character Median : 47.00
## Mean : 52.14
## 3rd Qu.: 65.00
## Max. :637.00
##
## Times Cited, All Databases UT (Unique WOS ID) Reviewed exported_notes_1
## Min. : 0.00 Length:20369 Min. :1 Mode:logical
## 1st Qu.: 1.00 Class :character 1st Qu.:1 NA's:20369
## Median : 6.00 Mode :character Median :1
## Mean : 21.64 Mean :1
## 3rd Qu.: 19.00 3rd Qu.:1
## Max. :4884.00 Max. :1
## NA's :20366
## included asreview_ranking
## Min. :0.000 Min. : 1
## 1st Qu.:0.000 1st Qu.: 5093
## Median :0.000 Median :10185
## Mean :0.195 Mean :10185
## 3rd Qu.:0.000 3rd Qu.:15277
## Max. :1.000 Max. :20369
## NA's :19465
Aggregation
asreviewWOS<- read_excel("data/asreview_dataset_all_assessing.xlsx") |>
filter(included == 1) |>
mutate(source = "WOS")
colnames(asreviewWOS)
## [1] "record_id" "Title"
## [3] "CAT_title" "Abstract"
## [5] "Authors" "Source Title"
## [7] "Journal Abbreviation" "Publication Year"
## [9] "Volume" "Issue"
## [11] "Start Page" "End Page"
## [13] "DOI" "Language"
## [15] "Author Keywords" "Cited Reference Count"
## [17] "Times Cited, All Databases" "UT (Unique WOS ID)"
## [19] "Elligibility" "CorePaper"
## [21] "included" "asreview_ranking"
## [23] "reason_inelligibility" "source"
fulltextlist <- asreviewWOS |>
mutate(cit = paste0(Authors, ", ", `Publication Year`, ", ",Title,", ",
`Source Title`,", ", Volume, ", ", Issue,
", ",`Start Page`, "-",`End Page`)) |>
select(cit)
#write_excel_csv(fulltextlist, "data/citation_elligible.txt")
asreviewWOS |>
group_by(Elligibility, CorePaper) |>
summarise(n = n(),
minYear = min(`Publication Year`, na.rm=T),
maxYear = max(`Publication Year`, na.rm=T),
meanYear = mean(`Publication Year`, na.rm=T),
medYear = median(`Publication Year`, na.rm=T),
meanRefs = mean(`Cited Reference Count`),
meanCites = mean(`Times Cited, All Databases`),
maxRef = max(`Times Cited, All Databases`, na.rm=T),
)
## `summarise()` has grouped output by 'Elligibility'. You can override using the
## `.groups` argument.
## # A tibble: 3 Ă— 10
## # Groups: Elligibility [2]
## Elligibility CorePaper n minYear maxYear meanYear medYear meanRefs
## <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 109 1992 2023 2014. 2016. 52.9
## 2 1 0 46 1991 2023 2016. 2018 57.4
## 3 1 1 30 1995 2022 2015. 2017 51.4
## # ℹ 2 more variables: meanCites <dbl>, maxRef <dbl>
Merging sources (Figure 2B)
expertSources<- read_excel("data/asreview_dataset_all_assessing_othersources.xlsx") |>
filter(included == 1) |>
mutate(source = substr(`unique ID`,1,5))
colnames(expertSources) <- colnames(asreviewWOS)
asreview2 <- rbind(asreviewWOS, expertSources)
asreview2$Language <- as.factor(asreview2$Language)
summary(asreview2)
## record_id Title CAT_title Abstract
## Min. : 0 Length:189 Length:189 Length:189
## 1st Qu.: 47 Class :character Class :character Class :character
## Median : 94 Mode :character Mode :character Mode :character
## Mean : 1007
## 3rd Qu.: 141
## Max. :40003
##
## Authors Source Title Journal Abbreviation Publication Year
## Length:189 Length:189 Length:189 Min. :1991
## Class :character Class :character Class :character 1st Qu.:2011
## Mode :character Mode :character Mode :character Median :2017
## Mean :2015
## 3rd Qu.:2020
## Max. :2023
## NA's :3
## Volume Issue Start Page End Page
## Min. : 1.00 Min. : 1 Length:189 Length:189
## 1st Qu.: 35.00 1st Qu.: 2 Class :character Class :character
## Median : 53.00 Median : 3 Mode :character Mode :character
## Mean : 81.41 Mean : 1823
## 3rd Qu.: 79.50 3rd Qu.: 5
## Max. :2007.00 Max. :45271
## NA's :7 NA's :34
## DOI Language Author Keywords Cited Reference Count
## Length:189 English:183 Length:189 Min. : 11.00
## Class :character French : 1 Class :character 1st Qu.: 35.00
## Mode :character Spanish: 5 Mode :character Median : 49.00
## Mean : 54.44
## 3rd Qu.: 68.00
## Max. :188.00
##
## Times Cited, All Databases UT (Unique WOS ID) Elligibility
## Min. : 0.00 Length:189 Min. :0.0000
## 1st Qu.: 2.00 Class :character 1st Qu.:0.0000
## Median : 13.00 Mode :character Median :0.0000
## Mean : 59.45 Mean :0.4233
## 3rd Qu.: 37.00 3rd Qu.:1.0000
## Max. :4884.00 Max. :1.0000
##
## CorePaper included asreview_ranking reason_inelligibility
## Min. :0.0000 Min. :1 Min. : 1 Length:189
## 1st Qu.:0.0000 1st Qu.:1 1st Qu.: 47 Class :character
## Median :0.0000 Median :1 Median : 93 Mode :character
## Mean :0.1693 Mean :1 Mean : 93
## 3rd Qu.:0.0000 3rd Qu.:1 3rd Qu.:139
## Max. :1.0000 Max. :1 Max. :185
## NA's :4
## source
## Length:189
## Class :character
## Mode :character
##
##
##
##
asreview2 |>
group_by(Elligibility, CorePaper) |>
summarise(n = n(),
minYear = min(`Publication Year`, na.rm=T),
maxYear = max(`Publication Year`, na.rm=T),
meanYear = mean(`Publication Year`, na.rm=T),
medYear = median(`Publication Year`, na.rm=T),
meanRefs = mean(`Cited Reference Count`),
meanCites = mean(`Times Cited, All Databases`)
)
## `summarise()` has grouped output by 'Elligibility'. You can override using the
## `.groups` argument.
## # A tibble: 3 Ă— 9
## # Groups: Elligibility [2]
## Elligibility CorePaper n minYear maxYear meanYear medYear meanRefs
## <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 109 1992 2023 2014. 2016. 52.9
## 2 1 0 48 1991 2023 2016. 2018 59.3
## 3 1 1 32 1995 2023 2015. 2017 52.5
## # ℹ 1 more variable: meanCites <dbl>
Appendix B & C
eligiblelist <- asreview2 |>
filter(Elligibility == 1) |>
mutate(cit = paste0(Authors, ", ", `Publication Year`, ", ",Title,", ",
`Source Title`,", ", Volume, ", ", Issue,
", ",`Start Page`, "-",`End Page`)) |>
select(cit)
head(eligiblelist)
## # A tibble: 6 Ă— 1
## cit
## <chr>
## 1 Fernandes, L; Tempere, J, 2020, Effect of segregation on inequality in kineti…
## 2 Watson, T, 2009, INEQUALITY AND THE MEASUREMENT OF RESIDENTIAL SEGREGATION BY…
## 3 Tammaru, T; Marcinczak, S; Aunap, R; van Ham, M; Janssen, H, 2020, Relationsh…
## 4 Owens, A, 2018, Income Segregation between School Districts and Inequality in…
## 5 Lobmayer, P; Wilkinson, RG, 2002, Inequality, residential segregation by inco…
## 6 Yabe, T; Ukkusuri, SV, 2020, Effects of income inequality on evacuation, reen…
#write_excel_csv(eligiblelist, "data/citation_scoping.txt")
corelist <- asreview2 |>
filter(CorePaper == 1) |>
mutate(cit = paste0(Authors, ", ", `Publication Year`, ", ",Title,", ",
`Source Title`,", ", Volume, ", ", Issue,
", ",`Start Page`, "-",`End Page`)) |>
select(cit)
head(corelist)
## # A tibble: 6 Ă— 1
## cit
## <chr>
## 1 Fernandes, L; Tempere, J, 2020, Effect of segregation on inequality in kineti…
## 2 Watson, T, 2009, INEQUALITY AND THE MEASUREMENT OF RESIDENTIAL SEGREGATION BY…
## 3 Tammaru, T; Marcinczak, S; Aunap, R; van Ham, M; Janssen, H, 2020, Relationsh…
## 4 Scarpa, S, 2015, The impact of income inequality on economic residential segr…
## 5 Rodriguez, GM, 2020, Socioeconomic Inequality and Housing Segregation, CUADER…
## 6 Chen, WH; Myles, J; Picot, G, 2012, Why Have Poorer Neighbourhoods Stagnated …
#write_excel_csv(corelist, "data/citation_core.txt")
Theoretical framework of causal pathways
nodes <- data.frame(id = LETTERS[1:7],
concepts = c("Economic inequality","Economic segregation",
"Labour ineq.",
"School ineq.","Housing ineq.",
"Other ineq. (health, crime...)",
"Other"),
type = c("Main concept","Main concept",
"Mediator", "Mediator",
"Mediator","Mediator",
"Other"),
"lon" = c(1, 3, 2, 2, 2, 2, 2),
"lat" = c(2.5, 2.5, 4.5, 4, 3.5, 2, 1.5))
links <- data.frame( from = c("A","C", "D", "E", "F",
"G", "B", "C","D", "E",
"F", "G", "A", "A", "A",
"A", "A", "B", "B", "B",
"B", "B", "D", "C", "D",
"E", "E", "C", "G", "C",
"G", "G", "C", "D", "E",
"F", "G"),
to = c("B", "B","B", "B", "B",
"B", "A", "A", "A", "A",
"A", "A", "C", "D", "E",
"F", "G", "C", "D", "E",
"F", "G", "C", "D", "E",
"D", "C", "E", "C", "G",
"E", "F", "G", "G", "G",
"G", "D"),
id = 1:37,
type = c("Direct", "Mediated", "Mediated","Mediated","Mediated",
"External", "Direct", "Mediated", "Mediated","Mediated",
"Mediated", "External", "On mediators", "On mediators", "On mediators",
"On mediators", "External", "On mediators", "On mediators", "On mediators",
"On mediators", "External", "Inter-Mediator", "Inter-Mediator", "Inter-Mediator",
"Inter-Mediator", "Inter-Mediator", "Inter-Mediator", "External", "External",
"External", "External","External", "External","External",
"External", "External"))
net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)
ecolors <- c("mediumvioletred", "gray75", "gray45", "cornflowerblue", "mediumaquamarine")
alphaecolors <- alpha(ecolors, 0.5)
esizes <- c(5, 1, 2, 4, 3)
E(net)$color <- alphaecolors[as.factor(E(net)$type)]
E(net)$width <- esizes[as.factor(E(net)$type)]
# E(net)$label <- E(net)$id
# E(net)$label.color <- ecolors[as.factor(E(net)$type)]
E(net)$weight <- as.numeric(esizes[as.factor(E(net)$type)]) * 5
vcolors <- c("tomato", "orange", "black")
vsizes <- c(4, 2, 1)
V(net)$label <- V(net)$concepts
V(net)$color <- vcolors[as.factor(V(net)$type)]
V(net)$size <- vsizes[as.factor(V(net)$type)]*5
V(net)$label.cex <- sqrt(vsizes[as.factor(V(net)$type)]) / 2
V(net)$label.degree <- c(pi/2,pi/2, -1.5, -1.5, pi/2, pi/2, pi/2)
V(net)$frame.color <- "white"
environment(plot.igraph2) <- asNamespace('igraph')
environment(igraph.Arrows2) <- asNamespace('igraph')
l <- layout.norm(as.matrix(nodes[,c("lon", "lat")]))
plot.igraph2(net, edge.arrow.size=.4, edge.curved=0.55, layout=l,
vertex.label.dist=2,
vertex.label.font=2, vertex.label.color="black",
#edge.label.color="gray20",
edge.label.cex=0.75,
edge.label.font=2,
edge.arrow.size=2* (as.numeric(E(net)$weight)/
as.numeric(max(E(net)$weight))/2),
edge.arrow.width=2* (as.numeric(E(net)$weight)/
max(as.numeric(E(net)$weight))),
main = "Direct, indirect and retro effects \n of economic inequality on economic segregation")
legend(x=-1.5, y=-1.1, levels(as.factor(nodes$type)), pch=21,
col="white", pt.bg=vcolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of concept")
legend(x=0.5, y=-1.1, levels(as.factor(links$type)), pch=21,
col="white", pt.bg=alphaecolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of relation")
Coding framework for causal pathways analysis (Appendix D)
nodes <- data.frame(id = LETTERS[1:7],
concepts = c("Economic inequality","Economic segregation",
"Labour ineq.",
"School ineq.","Housing ineq.",
"Other ineq. (health, crime...)",
"Other"),
type = c("Main concept","Main concept",
"Mediator", "Mediator",
"Mediator","Mediator",
"Other"),
"lon" = c(1, 3, 2, 2, 2, 2, 2),
"lat" = c(2.5, 2.5, 4.5, 4, 3.5, 2, 1.5))
links <- data.frame( from = c("A","C", "D", "E", "F",
"G", "B", "C","D", "E",
"F", "G", "A", "A", "A",
"A", "A", "B", "B", "B",
"B", "B", "D", "C", "D",
"E", "E", "C", "G", "C",
"G", "G", "C", "D", "E",
"F", "G"),
to = c("B", "B","B", "B", "B",
"B", "A", "A", "A", "A",
"A", "A", "C", "D", "E",
"F", "G", "C", "D", "E",
"F", "G", "C", "D", "E",
"D", "C", "E", "C", "G",
"E", "F", "G", "G", "G",
"G", "D"),
id = 1:37,
type = c("Direct", "Mediated", "Mediated","Mediated","Mediated",
"External", "Direct", "Mediated", "Mediated","Mediated",
"Mediated", "External", "On mediators", "On mediators", "On mediators",
"On mediators", "External", "On mediators", "On mediators", "On mediators",
"On mediators", "External", "Inter-Mediator", "Inter-Mediator", "Inter-Mediator",
"Inter-Mediator", "Inter-Mediator", "Inter-Mediator", "External", "External",
"External", "External","External", "External","External",
"External", "External"))
net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)
ecolors <- c("mediumvioletred", "gray75", "gray45", "cornflowerblue", "mediumaquamarine")
alphaecolors <- alpha(ecolors, 0.5)
esizes <- c(5, 1, 2, 4, 3)
E(net)$color <- alphaecolors[as.factor(E(net)$type)]
E(net)$width <- esizes[as.factor(E(net)$type)]
E(net)$label <- E(net)$id
E(net)$label.color <- ecolors[as.factor(E(net)$type)]
E(net)$weight <- as.numeric(esizes[as.factor(E(net)$type)]) * 5
vcolors <- c("tomato", "orange", "black")
vsizes <- c(4, 2, 1)
V(net)$label <- V(net)$concepts
V(net)$color <- vcolors[as.factor(V(net)$type)]
V(net)$size <- vsizes[as.factor(V(net)$type)]*5
V(net)$label.cex <- sqrt(vsizes[as.factor(V(net)$type)]) / 2
V(net)$label.degree <- c(pi/2,pi/2, -1.5, -1.5, pi/2, pi/2, pi/2)
V(net)$frame.color <- "white"
environment(plot.igraph2) <- asNamespace('igraph')
environment(igraph.Arrows2) <- asNamespace('igraph')
l <- layout.norm(as.matrix(nodes[,c("lon", "lat")]))
plot.igraph2(net, edge.arrow.size=.4, edge.curved=0.55, layout=l,
vertex.label.dist=2,
vertex.label.font=2, vertex.label.color="black",
#edge.label.color="gray20",
edge.label.cex=0.75,
edge.label.font=2,
edge.arrow.size=2* (as.numeric(E(net)$weight)/
as.numeric(max(E(net)$weight))/2),
edge.arrow.width=2* (as.numeric(E(net)$weight)/
max(as.numeric(E(net)$weight))),
main = "Direct, indirect and retro effects \n of economic inequality on economic segregation")
legend(x=-1.5, y=-1.1, levels(as.factor(nodes$type)), pch=21,
col="white", pt.bg=vcolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of concept")
legend(x=0.5, y=-1.1, levels(as.factor(links$type)), pch=21,
col="white", pt.bg=alphaecolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of relation")
Mergin coding results with bibliometric information
form <- gsheet2tbl('https: //docs.google.com/spreadsheets/d/1iHgL9ziZ2IGhYXUqNmphgbUkfjfzTH4RVpBgq80A2Wc/edit?usp=sharing')
fdata <- left_join(form, asreview2, by=c("Article-ID"="UT (Unique WOS ID)")) %>%
mutate(path = strsplit(gsub(";","",`Causal Path`), " "))
fdata |>
group_by(Elligibility, CorePaper) |>
summarise(n = n(),
minYear = min(`Publication Year`, na.rm=T),
maxYear = max(`Publication Year`, na.rm=T),
meanYear = mean(`Publication Year`, na.rm=T),
medYear = median(`Publication Year`, na.rm=T),
meanRefs = mean(`Cited Reference Count`),
meanCites = mean(`Times Cited, All Databases`)
)
## `summarise()` has grouped output by 'Elligibility'. You can override using the
## `.groups` argument.
## # A tibble: 2 Ă— 9
## # Groups: Elligibility [1]
## Elligibility CorePaper n minYear maxYear meanYear medYear meanRefs
## <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 48 1991 2023 2016. 2018 59.3
## 2 1 1 32 1995 2023 2015. 2017 52.5
## # ℹ 1 more variable: meanCites <dbl>
fdata$date <- as.Date(fdata$Horodateur, format = "%d/%m/%Y")
fdata$num <- as.numeric(fdata$date)
bin <- 30
ggplot(fdata, aes(date, ..count..)) +
geom_histogram(binwidth = bin, colour="black") +
scale_x_date(breaks = seq(min(fdata$date), # change -20 term to taste
max(fdata$date)+1,
bin*2),
labels = date_format("%b-%y"),
limits = c(as.Date("2023-03-01"),
as.Date("2024-09-01"))) +
ylab("Number of articles coded per month") + xlab("")
Language & case study (figure 3A)
summary(fdata$Language)
## English French Spanish
## 78 0 2
table(fdata$CorePaper, fdata$`if Location(s) where (Zone)?`)
##
## Argentina Australia
## 0 0 2
## 1 1 0
##
## Austria; Netherlands; Sweden; Spain; Norway; Estonia; UK; Hungary; Lithuania; Greece; Czech Republic; Latvia
## 0 0
## 1 1
##
## Brazil Canada China Finland France Germany
## 0 0 0 3 1 1 1
## 1 1 2 0 0 0 0
##
## Greece; Hungary; Finland; Spain; Italy; Norway; Sweden; Estonia Israel
## 0 0 1
## 1 1 0
##
## Netherlands Norway South Africa Sweden UK United States of America
## 0 0 1 1 1 2 27
## 1 1 1 0 4 1 9
##
## United States of America, France
## 0 0
## 1 1
##
## United States of America, Netherlands, France, UK, Australia, South Africa, Denmark, Canada, Brazil; Ireland; Mexico; New Zealand;
## 0 1
## 1 0
##
## Uruguay
## 0 0
## 1 1
Disciplinary partition (Appendix E)
fdata$publication = as_factor(fdata$`Source Title`)
discipline <- read_csv("data/journal_lookup.csv")
## Rows: 59 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): journal, discipline
## dbl (1): papers
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fdata <- left_join(fdata, discipline, by = c("publication" = "journal"))
fdata$discipline <- factor(fdata$discipline, levels =
c("SOC" , "ECON", "URBREG",
"GEOG", "DEMO", "CRIMEPID",
"PHYMAT", "OTHER"))
summary(fdata$discipline)
## SOC ECON URBREG GEOG DEMO CRIMEPID PHYMAT OTHER
## 18 16 15 8 6 5 3 9
fdata$year <- as.numeric(fdata$`Publication Year`)
Distribution by disciplines (Figure 2A)
ggplot(fdata, aes(x = year, fill = discipline)) +
geom_histogram() +
ylab("Number of eligible article published")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Conceptual understanding
fdata$SegAsResidential <- as.factor(ifelse(
grepl("Residential segregation", form$`Concept for "segregation"`, fixed = TRUE),1,0
))
fdata$SegAsConcentration <- as.factor(ifelse(
grepl("Concentration",
form$`Concept for "segregation"`, fixed = TRUE) ,1,0
))
fdata$SegAsOther <- as.factor(ifelse(
fdata$SegAsResidential == 0 &
fdata$SegAsConcentration == 0,
1,0
))
summary(fdata$SegAsResidential)
## 0 1
## 16 64
summary(fdata$SegAsConcentration)
## 0 1
## 70 10
summary(fdata$SegAsOther)
## 0 1
## 71 9
fdata$IneqAsDist <- as.factor(ifelse(
grepl("Skewed distribution",
form$`Concept for "inequality"`, fixed = TRUE),1,0
))
fdata$IneqAsConcentration <- as.factor(ifelse(
grepl("Economic concentration (top income shares)",
form$`Concept for "inequality"`, fixed = TRUE) ,1,0
))
fdata$IneqAsOther <- as.factor(ifelse(
fdata$IneqAsDist == 0 &
fdata$IneqAsConcentration == 0,
1,0
))
summary(fdata$IneqAsDist)
## 0 1
## 33 47
summary(fdata$IneqAsConcentration)
## 0 1
## 71 9
summary(fdata$IneqAsOther)
## 0 1
## 48 32
fdata$EconAsIncome <- as.factor(ifelse(
grepl("Income",
form$`Concept for "Economic"`, fixed = TRUE) |
grepl("income",
form$`Concept for "Economic"`, fixed = TRUE),1,0
))
fdata$EconAsWealth <- as.factor(ifelse(
grepl("Wealth",
form$`Concept for "Economic"`, fixed = TRUE) ,1,0
))
fdata$EconAsOther <- as.factor(ifelse(
(grepl("Wealth, ",
form$`Concept for "Economic"`, fixed = TRUE) &
fdata$EconAsIncome == 0 ) |
(grepl("Income, ",
form$`Concept for "Economic"`, fixed = TRUE) &
fdata$EconAsWealth == 0 ) |
(grepl("Wealth, ",
form$`Concept for "Economic"`, fixed = TRUE) &
grepl("Income, ",
form$`Concept for "Economic"`, fixed = TRUE)) |
(fdata$EconAsWealth == 0 &
fdata$EconAsIncome == 0 ) ,
1,0
))
summary(fdata$EconAsIncome)
## 0 1
## 7 73
summary(fdata$EconAsWealth)
## 0 1
## 67 13
summary(fdata$EconAsOther)
## 0 1
## 65 15
Concept distribution
fdata$ID <- fdata$`Article-ID`
segconcept <- fdata[,c("ID","SegAsResidential", "SegAsConcentration", "SegAsOther")]
xseg <- list(
Residential = unlist(as.list(segconcept[segconcept$SegAsResidential == 1,"ID"])),
Concentration = unlist(as.list(segconcept[segconcept$SegAsConcentration == 1,"ID"])),
Other = unlist(as.list(segconcept[segconcept$SegAsOther == 1,"ID"])))
ggVennDiagram(xseg, label_alpha = 0.2, edge_lty = 0, set_size = 2.7) +
ggplot2::scale_fill_gradient(low="white",high = "aquamarine3")+
ggplot2::ggtitle("How is segregation conceptualized?",
subtitle = "Eligible papers: n=80") +
ggplot2::theme(legend.position='right',
legend.justification='right',
legend.direction='vertical')
ineqconcept <- fdata[,c("ID","IneqAsDist", "IneqAsConcentration", "IneqAsOther")]
xineq <- list(
Distribution = unlist(as.list(ineqconcept[ineqconcept$IneqAsDist == 1,"ID"])),
Concentration = unlist(as.list(ineqconcept[ineqconcept$IneqAsConcentration == 1,"ID"])),
Other = unlist(as.list(ineqconcept[ineqconcept$IneqAsOther == 1,"ID"])))
ggVennDiagram(xineq, label_alpha = 0.2, edge_lty = 0, set_size = 2.7) +
ggplot2::scale_fill_gradient(low="white",high = "#FFA500")+
ggplot2::ggtitle("How is inequality conceptualized?",
subtitle = "Eligible papers: n=80") +
ggplot2::theme(legend.position='right',
legend.justification='right',
legend.direction='vertical')
econconcept <- fdata[,c("ID","EconAsIncome", "EconAsWealth", "EconAsOther")]
xecon <- list(
Income = unlist(as.list(econconcept[econconcept$EconAsIncome == 1,"ID"])),
Wealth = unlist(as.list(econconcept[econconcept$EconAsWealth == 1,"ID"])),
Other = unlist(as.list(econconcept[econconcept$EconAsOther == 1,"ID"])))
ggVennDiagram(xecon, label_alpha = 0.2, edge_lty = 0, set_size = 2.7) +
ggplot2::scale_fill_gradient(low="white",high = "#C77398")+
ggplot2::ggtitle("How is the economic resource conceptualized?",
subtitle = "Eligible papers: n=80") +
ggplot2::theme(legend.position='right',
legend.justification='right',
legend.direction='vertical')
Query about papers using wealth as economic concept:
fdata |>
filter(EconAsWealth == 1) |>
select(Authors, year, `Concept for "Economic"`,CorePaper, `if Location(s) where (Zone)?`,
`Methods used for assessing effects and relation between segregation and inequality`)
## # A tibble: 13 Ă— 6
## Authors year Concept for "Economi…¹ CorePaper if Location(s) where…²
## <chr> <dbl> <chr> <dbl> <chr>
## 1 Wessel, T 2022 Income, Wealth, Socia… 0 Norway
## 2 Fernandes, L; … 2020 Income, Wealth 1 <NA>
## 3 Levy, BL 2022 Wealth 0 United States of Amer…
## 4 De la Cruz-Vie… 2018 Wealth 0 United States of Amer…
## 5 Hochstenbach, C 2018 Income, Wealth 1 Netherlands
## 6 Panagiotakopou… 2022 Income, Wealth 0 <NA>
## 7 Gordon, C; Bru… 2020 Wealth, housing equity 0 United States of Amer…
## 8 ViforJ, RO; Cl… 2023 Wealth, intergenerati… 0 Australia
## 9 Wessel, T 2016 Income, Wealth 1 Norway
## 10 Reardon, SF; F… 2015 Income, Wealth 0 United States of Amer…
## 11 Fang, M; Huang… 2022 Income, Wealth, wages… 0 China
## 12 Bonakdar, SB; … 2023 Income, Wealth 0 <NA>
## 13 Thomas, H; Man… 2018 Income, Wealth 0 United States of Amer…
## # ℹ abbreviated names: ¹​`Concept for "Economic"`,
## # ²​`if Location(s) where (Zone)?`
## # ℹ 1 more variable:
## # `Methods used for assessing effects and relation between segregation and inequality` <chr>
Gini distribution (figure 3B)
averageGini <- read_csv("data/country_Gini.csv")
## Rows: 21 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Zone
## dbl (1): LatestAverageGini
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
worldGini <- read_csv("data/country_Gini_context.csv")
## Rows: 145 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country Name, Country Code, Indicator Name, Indicator Code
## dbl (1): LatestGini
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
analysisPerGiniLevel <- left_join(fdata, averageGini, by =
c(`if Location(s) where (Zone)?` = "Zone")
)
ggplot() +
geom_density(data = worldGini, aes(x=LatestGini),
fill = alpha("#FF8095", 0.6),
color=NA) +
geom_density(data = analysisPerGiniLevel, aes(x=LatestAverageGini),
fill = alpha("#FFAA00", 0.6),
color=NA) +
xlab("Latest value of national Gini") +
ylab("Density distribution") +
ggtitle("Eligible articles | n = 80") +
scale_y_continuous(limits = c(0,0.115)) +
ggplot2::annotate("text", x=50, y= 0.023, label="World", color=alpha("#FF8095", 0.9)) +
ggplot2::annotate("text", x=50, y= 0.098, label="Countries studied", color=alpha("#FFAA00", 0.9)) +
theme_light()
ggplot() +
geom_density(data = worldGini, aes(x=LatestGini),
fill = alpha("#FF8095", 0.6),
color=NA) +
geom_density(data = analysisPerGiniLevel[analysisPerGiniLevel$CorePaper == 1,], aes(x=LatestAverageGini),
fill = alpha("#FFAA00", 0.6),
color=NA) +
xlab("Latest value of national Gini") +
ylab("Density distribution") +
scale_y_continuous(limits = c(0,0.115)) +
ggtitle("Core articles | n = 32") +
ggplot2::annotate("text", x=50, y= 0.023, label="World",
color=alpha("#FF8095", 0.9)) +
ggplot2::annotate("text", x=50, y= 0.068, label="Countries studied",
color=alpha("#FFAA00", 0.9)) +
theme_light()
Methodology & study design
fdata$AnalyticalModel <- as.factor(ifelse(
grepl("analytical model",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE) |
grepl("Simulation", form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
fdata$DiscourseAnalysis <-as.factor(ifelse(
grepl("Discourse analysis",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
fdata$HistoricalAnalysis <-as.factor(ifelse(
grepl("Historical analysis",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
fdata$IndexAnalysis <-as.factor(ifelse(
grepl("Index comparison",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
fdata$MapComparison <-as.factor(ifelse(
grepl("Map comparison",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
fdata$Simulation <-as.factor(ifelse(
grepl("Simulation",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
fdata$Statistical <-as.factor(ifelse(
grepl("Statistical regression/correlation",
form$`Methods used for assessing effects and relation between segregation and inequality`,
fixed = TRUE),1,0
))
methodo <- fdata |>
select(Authors, year, CorePaper, `if Location(s) where (Zone)?`, discipline,
HistoricalAnalysis, AnalyticalModel, DiscourseAnalysis, IndexAnalysis,
MapComparison, Statistical,
`Type of analysis`, `Scale of analysis`, `Unit of analysis`, `Time frame of study`)
summary(methodo)
## Authors year CorePaper if Location(s) where (Zone)?
## Length:80 Min. :1991 Min. :0.0 Length:80
## Class :character 1st Qu.:2014 1st Qu.:0.0 Class :character
## Mode :character Median :2018 Median :0.0 Mode :character
## Mean :2015 Mean :0.4
## 3rd Qu.:2021 3rd Qu.:1.0
## Max. :2023 Max. :1.0
## NA's :2
## discipline HistoricalAnalysis AnalyticalModel DiscourseAnalysis
## SOC :18 0:63 0:61 0:77
## ECON :16 1:17 1:19 1: 3
## URBREG :15
## OTHER : 9
## GEOG : 8
## DEMO : 6
## (Other): 8
## IndexAnalysis MapComparison Statistical Type of analysis Scale of analysis
## 0:53 0:72 0:33 Length:80 Length:80
## 1:27 1: 8 1:47 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Unit of analysis Time frame of study
## Length:80 Length:80
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
Methodological distribution (Figure 4)
p1 <- methodo |>
ggplot(aes(x=Statistical)) +
geom_bar(fill="blue") +
facet_wrap(~discipline, ncol = 4) +
xlab("Use of statistical regression models") +
ylab("Number of articles") +
theme(axis.title.x = element_text(colour = "blue")) +
scale_y_continuous(limits = c(0, 17)) +
scale_x_discrete(labels = NULL,
limits = c(1))
p2 <- methodo |>
ggplot(aes(x=AnalyticalModel)) +
geom_bar(fill="orange") +
facet_wrap(~discipline, ncol = 4) +
xlab("Use of analytical models and simulation") +
ylab("Number of articles") +
theme(axis.title.x = element_text(colour = "orange"))+
scale_y_continuous(limits = c(0, 17))+
scale_x_discrete(labels = NULL,
limits = c(1))
p3 <- methodo |>
ggplot(aes(x=IndexAnalysis)) +
geom_bar(fill="forestgreen") +
facet_wrap(~discipline, ncol = 4) +
xlab("Use of index comparison") +
ylab("Number of articles") +
theme(axis.title.x = element_text(colour = "forestgreen"))+
scale_y_continuous(limits = c(0, 17))+
scale_x_discrete(labels = NULL,
limits = c(1))
p4 <- methodo |>
ggplot(aes(x=HistoricalAnalysis)) +
geom_bar(fill="firebrick3") +
facet_wrap(~discipline, ncol = 4) +
xlab("Use of historical analysis") +
ylab("Number of articles") +
theme(axis.title.x = element_text(colour = "firebrick3"))+
scale_y_continuous(limits = c(0, 17))+
scale_x_discrete(labels = NULL,
limits = c(1))
p5 <-methodo |>
ggplot(aes(x=MapComparison)) +
geom_bar(fill="aquamarine3") +
facet_wrap(~discipline, ncol = 4) +
xlab("Use of map comparisons") +
ylab("Number of articles") +
theme(axis.title.x = element_text(colour = "aquamarine3"))+
scale_y_continuous(limits = c(0,17))+
scale_x_discrete(labels = NULL,
limits = c(1))
p6 <- methodo |>
ggplot(aes(x=DiscourseAnalysis)) +
geom_bar(fill="#C77398") +
facet_wrap(~discipline, ncol = 4) +
xlab("Use of discourse analysis") +
ylab("Number of articles") +
theme(axis.title.x = element_text(colour = "#C77398"))+
scale_y_continuous(limits = c(0, 17))+
scale_x_discrete(labels = NULL,
limits = c(1))
grid.arrange(p1, p2, p3,
p4, p5, p6, ncol=3)
Time frame of studies (Appendix G)
longitudinal <- fdata |>
filter(grepl(":",
form$`Time frame of study`,
fixed = TRUE) |
grepl("-",
form$`Time frame of study`,
fixed = TRUE)) |>
select(Authors, year, `Time frame of study`) # |>
data <- read_csv("data/longitudinal_studies_hand.csv") |>
mutate(start = ymd(paste0(start,"01-01")),
end = ymd(paste0(end,"01-01"))) |>
arrange(desc(end)) |>
pivot_longer(cols=c("start", "end"),
names_to="date_type",
values_to = "date")
## Rows: 22 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): reference
## dbl (2): start, end
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Distribution of coefficient values (Figure 6)
ggplot(data, aes(x=fct_inorder(reference), y=date)) +
geom_line(size=0.5, col="blue") +
ylab("") + xlab("") +
coord_flip() +
theme_minimal()
Units and Scales
fdata$nationalScale <- as.factor(ifelse(
grepl("National",
form$`Scale of analysis`,
fixed = TRUE) ,1,0
))
fdata$metroScale <- as.factor(ifelse(
grepl("Urban (metropolitan)",
form$`Scale of analysis`,
fixed = TRUE) ,1,0
))
table(fdata$nationalScale, fdata$discipline)
##
## SOC ECON URBREG GEOG DEMO CRIMEPID PHYMAT OTHER
## 0 4 7 11 6 1 4 2 6
## 1 14 9 4 2 5 1 1 3
table(fdata$metroScale, fdata$discipline)
##
## SOC ECON URBREG GEOG DEMO CRIMEPID PHYMAT OTHER
## 0 14 10 8 3 6 5 2 3
## 1 4 6 7 5 0 0 1 6
fdata$indUnit <- as.factor(ifelse(
grepl("Individual",
form$`Unit of analysis`,
fixed = TRUE) ,1,0
))
fdata$houseUnit <- as.factor(ifelse(
grepl("Household",
form$`Unit of analysis`,
fixed = TRUE) ,1,0
))
fdata$neighUnit <- as.factor(ifelse(
grepl("Neighbourhood",
form$`Unit of analysis`,
fixed = TRUE) ,1,0
))
summary(fdata$indUnit)
## 0 1
## 47 33
summary(fdata$houseUnit)
## 0 1
## 54 26
summary(fdata$neighUnit)
## 0 1
## 68 12
Causal pathways
# Causal Paths
fishgraph <- function(data, nodes, links, ID="all", legend=T){
if(ID == "all"){
fdata <- data %>%
filter(CorePaper == 0)
pastetext <- paste0("Distribution of effects in non-core articles | n = ", dim(fdata[!is.na(fdata$path),])[1])
} else {
if(ID == "core"){
fdata <- data %>%
filter(CorePaper == 1)
pastetext <- paste0("Distribution of effects in core articles | n = ", dim(fdata[!is.na(fdata$path),])[1])
} else {
fdata <- data %>%
filter(`Article-ID` == ID)
dim(fdata[!is.na(fdata$path),])[1]
pastetext <- paste0("Article = ", fdata$Authors,
", ", fdata$`Publication Year`)
}}
dt_list <- map(fdata$path, as.data.table)
dt <- as.data.frame(rbindlist(dt_list, fill = TRUE, idcol = T))
colnames(dt) <- c("ID", "path")
dt$path <- as.numeric(dt$path)
freq <- dt %>%
group_by(path) %>%
count(.)
#
# freq <- freq[freq$path %in% as.factor(1:37),]
# freq$f <- freq$n / sum(freq$n) * 100
# freq$id <- as.numeric(as.character(freq$path))
# links <- left_join(links, freq, by="id")
#
freq <- freq[freq$path %in% as.factor(1:37),]
freq$f <- freq$n
freq$id <- as.numeric(as.character(freq$path))
links <- left_join(links, freq, by="id")
net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)
ecolors <- c("mediumvioletred", "gray75", "gray45", "cornflowerblue", "mediumaquamarine")
alphaecolors <- alpha(ecolors, 0.6)
E(net)$color <- ifelse(!is.na(E(net)$f), alphaecolors[as.factor(E(net)$type)], alpha("white", 0))
E(net)$width <- E(net)$f
E(net)$weight <- E(net)$f
E(net)$label <- ifelse(E(net)$f>3, paste0(round(E(net)$f, 0), "%"), NA)
E(net)$label.color <- ifelse(!is.na(E(net)$f), ecolors[as.factor(E(net)$type)], alpha("white", 0))
vcolors <- c("tomato", "orange", "black")
vsizes <- c(4, 2, 1)
V(net)$label <- V(net)$concepts
V(net)$color <- vcolors[as.factor(V(net)$type)]
V(net)$size <- vsizes[as.factor(V(net)$type)]*5
V(net)$label.cex <- sqrt(vsizes[as.factor(V(net)$type)]) / 2
V(net)$label.degree <- c(pi,2*pi, -1.5, -1.5, pi/2, pi/2, pi/2)
V(net)$frame.color <- "white"
plot.igraph2(net, edge.arrow.size=.6, edge.curved=0.4, layout=l,
vertex.label.dist=2,
vertex.label.font=2, vertex.label.color="black",
edge.label.cex=0.75,
edge.label.font=2,
main = pastetext
)
if(legend == T){
legend(x=-1.5, y=-1.1, levels(as.factor(nodes$type)), pch=21,
col="white", pt.bg=vcolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of concept")
legend(x=0.5, y=-1.1, levels(as.factor(links$type)), pch=21,
col="white", pt.bg=alphaecolors, pt.cex=2, cex=.8, bty="n", ncol=1, title="Type of relation")
}
}
Direction of causality
dt_list <- map(fdata$path, as.data.table)
dt <- as.data.frame(rbindlist(dt_list, fill = TRUE, idcol = T))
colnames(dt) <- c("ID", "path")
dt$path <- as.numeric(dt$path)
itos <- c(13,2,14,3,15,4,
1,16,5,17,6)
stoi <- c(18,8,19,9,20,10,
7,21,11,22,12)
mtom <- 23:37
i_dir <- data.frame()
for(i in 1:80){
paths <- dt |>
filter(ID == i)
i_dir[i,"ItoS"] <- ifelse("TRUE" %in% (paths$path %in% itos), 1, 0)
i_dir[i,"StoI"] <- ifelse("TRUE" %in% (paths$path %in% stoi), 1, 0)
i_dir[i,"MtoM"] <- ifelse("TRUE" %in% (paths$path %in% mtom), 1, 0)
}
fdata$ItoS <- i_dir$ItoS
fdata$StoI <- i_dir$StoI
fdata$MtoM <- i_dir$MtoM
directions <- fdata[fdata$CorePaper==0,c("ID","ItoS", "StoI", "MtoM")]
xdir <- list(
IneqToSeg = unlist(as.list(directions[directions$ItoS == 1,"ID"])),
SegToIneq = unlist(as.list(directions[directions$StoI == 1,"ID"])),
MedToMed = unlist(as.list(directions[directions$MtoM == 1,"ID"])))
direction_core <- fdata[fdata$CorePaper==1,c("ID","ItoS", "StoI", "MtoM")]
xdircore <- list(
IneqToSeg = unlist(as.list(direction_core[direction_core$ItoS == 1,"ID"])),
SegToIneq = unlist(as.list(direction_core[direction_core$StoI == 1,"ID"])),
MedToMed = unlist(as.list(direction_core[direction_core$MtoM == 1,"ID"])))
Directionality
fishgraph(fdata, nodes, links, legend = F)
fishgraph(fdata, nodes, links, "core", legend = F)
ggVennDiagram(xdir, label_alpha = 0.4, edge_lty = 0.1, set_size = 3.7) +
ggplot2::scale_fill_gradient(low="white",high = "black")+
ggplot2::ggtitle("Direction of causality assumed",
subtitle = "Eligible papers: n=48") +
ggplot2::theme(legend.position='right',
legend.justification='right',
legend.direction='vertical')
ggVennDiagram(xdircore, label_alpha = 0.4, edge_lty = 0.1, set_size = 3.7) +
ggplot2::scale_fill_gradient(low="white",high = "black")+
ggplot2::ggtitle("Direction of causality assumed",
subtitle = "Eligible papers: n=32") +
ggplot2::theme(legend.position='right',
legend.justification='right',
legend.direction='vertical')
Actors’ wordcloud (Appendix H)
answerWordcloud <- function(data, variableName, core = F, seednb = 123){
if(core == T){
data <- data %>%
filter(CorePaper == 1)
}
objectList <- map(data[[variableName]], as.data.table)
allobjects <- as.data.frame(rbindlist(objectList, fill = TRUE, idcol = T))
colnames(allobjects) <- c("ID", "var")
words <- allobjects %>%
count(var, sort=TRUE) %>%
filter(var != "NA")
set.seed(seednb) # for reproducibility
wordcloud(words = words$var, freq = words$n, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Paired"))
}
descdata <- fdata %>%
mutate(actors = strsplit(`Actors involved (separated by ";")`, "; "))
Wordcloud of actors (Appendix H)
answerWordcloud(descdata, "actors")
answerWordcloud(descdata, core=T, "actors")
Estimated coefficients (Appendix I)
quant_summary_coeff <- read.csv("data/quant_summary_coeff.csv") |>
mutate(low = Coeff - `Standard.Error`,
high = Coeff + `Standard.Error`,
`Comparable\nspecifications?` = ifelse(Ref %in% c("Reardon & Bischoff, 2011","Mutgan & Mijs, 2023", "Simpson et al., 2023"), "yes", "no")) |>
mutate(Reference = factor(Ref, levels = c("Mutgan & Mijs, 2023", "Simpson et al., 2023","Hu & Liang, 2022", "Owens, 2016","Reardon & Bischoff, 2011","Watson, 2009", "Telles, 1995")))
ggplot(quant_summary_coeff, aes(x = Reference,
colour = `Comparable\nspecifications?`,
y = Coeff)) +
geom_point() +
geom_errorbar(aes(ymin = low, ymax = high), width = 0.2)+
scale_colour_manual(values=c("aquamarine3","orange"))+
geom_hline(yintercept=0, colour = "black") +
ylab("Estimated values of coefficient in the study")+
coord_flip()