Skip to content

Commit

Permalink
Add ESS 10 data (#69)
Browse files Browse the repository at this point in the history
* Update import for ESS Round 10

+ add ESS 10 data file
+ add progress bar for reading ESS data files
+ make minor revisions code comments

* Use auto-format R code in RStudio

* Update documentation

* Add ESS 10 parties to ESS full party list

* Link Bulgaria in ESS link file

* Revise warning messages

+ fix spelling errors
+ use single quotes for variable names

* Modernize Tidyverse-R code

+ use Base-R pipe
+ use Base-R anonymous function
+ use conflicted package
+ use '.by' for single grouping
+ suppress read_csv() warning message
+ specify join keys
+ solve ggplot2 guides() warning

* Fix coding error first ESS party id

* Fix variable name mistake

* Revise finding different prtv/prtc ids

* Change order conflict resolution

* Update ESS to latest version

* Update ESS readme

* Update ESS prtc readme

* Use Base-R pipe

* Use utf-8 encoding for all Stata files

+ set utf-8 as function default
+ update list of Stata files used (exclude encoding)
+ use Base-R shorthand anonymous function

* Update data with correct encoding

* Correct DOI referencing

* Make code more readable

+ remove unused encoding parameter
+ use better function parameter name
+ correct one code comment for ESS-10

* Update data ESS round 10

* Update data ESS round 10

+ Minor fixes

* Add check for duplicates

* Keep entries without PF ID

* Update data ESS round 10

* Update data ESS round 10

* Update data ESS round 10

* Update data ESS round 10

* Update data ESS round 10

* Use conflicted coherently in all scripts

* Add minor revisions code documentation

* Use first in many-to-many joins

* Revise 01-ess-prt-raw.R

+ correct for multiple prtv* variables
+ DEU & LTU
+ add last three characters to 'ess_id'

* Update essprtv plot

* Update readme.md

* Update harmonized file

* Update harmonized file

* Update readme.md

+ Change references

* Update prtc data

* Update prtv data

* Update data

* Use 'styler' to format code

* Use '.default' argument in 'case_when()'

* Add new line for first function argument

- 'styler' package auto-reformat arguments previously
- checked 'styler' auto-reformat of changes

---------

Co-authored-by: pbederke <42916617+pbederke@users.noreply.github.com>
  • Loading branch information
hdigital and pbederke authored Aug 19, 2023
1 parent 8436080 commit 3919162
Show file tree
Hide file tree
Showing 11 changed files with 11,182 additions and 6,918 deletions.
2,510 changes: 1,501 additions & 1,009 deletions import/essprtc/essprtc.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion import/essprtc/readme.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# European Social Survey (ESS) · Rounds 1-9
# European Social Survey (ESS) · Rounds 1-10

+ __prtc*__ -- "Which party feel closer to, [...]"

Expand Down
136 changes: 83 additions & 53 deletions import/essprtv/01-ess-prt-raw.R
Original file line number Diff line number Diff line change
@@ -1,51 +1,63 @@
library(conflicted)
conflicts_prefer(dplyr::filter, .quiet = TRUE)

library(tidyverse)
library(readstata13)


ess_dta_path <- "source__ESS/" # path of ESS rounds Stata data
ess_dta_path <- "source__ESS/" # path of ESS rounds Stata data


## ESS 1-9 ----
## ESS waves ----

# List of information for Round 1 - 9
# data file information for ESS Rounds
ess_dta_files <-
tribble(
~data, ~encoding,
"ESS1e06_6.dta", "CP1252",
"ESS2e03_6.dta", "CP1252",
"ESS3e03_7.dta", "CP1252",
"ESS4e04_5.dta", "CP1252",
"ESS5e03_4.dta", "CP1252",
"ESS6e02_4.dta", "CP1252",
"ESS7e02_2.dta", "CP1252",
"ESS8e02_1.dta", "CP1252",
"ESS9e01_2.dta", "UTF-8"
c(
"ESS1e06_6.dta",
"ESS2e03_6.dta",
"ESS3e03_7.dta",
"ESS4e04_5.dta",
"ESS5e03_4.dta",
"ESS6e02_5.dta",
"ESS7e02_2.dta",
"ESS8e02_2.dta",
"ESS9e03_1.dta",
"ESS10.dta",
"ESS10SC.dta"
)

# Function to get party name and party ID
get_ess_parties <- function(data, encoding) {
data_path <- paste0(ess_dta_path, data)

party <-
read.dta13(data_path, fromEncoding = encoding) %>%
select(cntry, essround, starts_with(c("prtv", "prtc"))) %>%
pivot_longer(c(-cntry, -essround), names_to = "variable", values_to = "party")

party_id <-
read.dta13(data_path, convert.factors = FALSE, fromEncoding = encoding) %>%
select(cntry, essround, starts_with(c("prtv", "prtc"))) %>%
pivot_longer(c(-cntry, -essround), names_to = "variable", values_to = "party_id") %>%
# function to get party name and party ID
get_ess_parties <- function(ess_dta) {
data_path <- paste0(ess_dta_path, ess_dta)

party <-
read.dta13(data_path) |>
select(cntry, essround, starts_with(c("prtv", "prtc"))) |>
pivot_longer(
c(-cntry, -essround),
names_to = "variable",
values_to = "party"
)

party_id <-
read.dta13(data_path, convert.factors = FALSE) |>
select(cntry, essround, starts_with(c("prtv", "prtc"))) |>
pivot_longer(
c(-cntry, -essround),
names_to = "variable",
values_to = "party_id"
) |>
pull(party_id)

party["party_id"] <- party_id

return(party)
}

# party name and party ID for round 1-9 -- time intense so avoiding rereading
if(! exists("ess_prt_raw")) {
# party name and party ID for ESS rounds -- time intense so avoiding rereading
if (!exists("ess_prt_raw")) {
ess_prt_raw <-
pmap(ess_dta_files, ~ get_ess_parties(..1, ..2)) %>%
map(ess_dta_files, \(.x) get_ess_parties(.x), .progress = TRUE) |>
bind_rows()
}

Expand All @@ -54,35 +66,53 @@ if(! exists("ess_prt_raw")) {

# combine and create 'ess_id'
ess_prt_out <-
ess_prt_raw %>%
drop_na(party) %>%
distinct() %>%
mutate(ess_id = paste(cntry, essround, party_id, substr(variable, 4, 4), sep = "-")) %>%
ess_prt_raw |>
drop_na(party) |>
distinct() |>
mutate(
ess_id = case_when(
cntry %in% c("DE", "LT") & str_detect(variable, "prtv") ~ paste(
cntry,
essround,
party_id,
substr(variable, 4, 4),
str_sub(variable, -3, -1),
sep = "-"
),
.default = paste(
cntry,
essround,
party_id,
substr(variable, 4, 4),
sep = "-"
)
)
) |>
arrange(cntry, essround, variable, party_id)

write_csv(ess_prt_out, "01-ess-prt-raw.csv", na = "")


## Data issues ESS ----

print("ESS variables with multipe prtv* variables per county -- duplicate 'ess_id'")
ess_prt_out %>%
filter(str_detect(variable, "prtv.+\\d")) %>%
pull(variable) %>%
unique() %>%
print("ESS variables with multipe 'prtv*' variables per country -- duplicate 'ess_id'")

ess_prt_out |>
filter(str_detect(variable, "prtv.+\\d")) |>
pull(variable) |>
unique() |>
paste(collapse = ", ")


# find parties with different ids in prtv/prtc

prt_check <-
ess_prt_out %>%
select(ess_id, variable, party) %>%
mutate(variable = substr(variable, 1, 4)) %>%
distinct(ess_id, variable, .keep_all = TRUE) %>%
pivot_wider(names_from = variable, values_from = party)

prt_check_diff <-
prt_check %>%
drop_na() %>%
filter(prtv != prtc)
prt_vc_different <-
ess_prt_out |>
mutate(
variable = substr(variable, 1, 4),
ess_id_vc = str_remove(ess_id, "-v$|-c$")
) |>
distinct(ess_id, variable, .keep_all = TRUE) |>
select(ess_id_vc, variable, party) |>
pivot_wider(names_from = variable, values_from = party) |>
filter(prtv != prtc) |>
drop_na()
Loading

0 comments on commit 3919162

Please sign in to comment.