Sex | sex |
---|---|
Female | 0 |
Male | 1 |
7 Extract Demographic
1 Read Preprocessed File
We read output data from the previous section.
2 Demographics and Behavioral parameters
2.1 Age and Sex
age_years
will be mapped from the column Age
.
sex
is grouped as follows:
Code
<- cohort_B_data |>
age_gender_data ::select(c("cohort_unique_id",
dplyr"Age",
"Sex")) |>
::col_vals_expr(
pointblankexpr = ~ harmonisation::is_integer_vector(
"age"]],
cohort_A_data[[allow_na = TRUE)
|>
) ::mutate(
dplyr# Convert age to type integer
age_years = as.integer(.data[["Age"]]),
# Convert categorical columns to factors
sex = dplyr::case_when(
"Sex"]] == "Female" ~ "0",
.data[["Sex"]] == "Male" ~ "1",
.data[[.default = NA_character_
),`Sex` = forcats::fct_relevel(
"Sex"]],
.data[[c("Female", "Male")
),sex = forcats::fct_relevel(
"sex"]],
.data[[c("0", "1")),
|>
) ::relocate(
dplyr"sex",
.after = "Sex"
|>
) ::relocate(
dplyr"age_years",
.after = "Age"
|>
) ::col_vals_in_set(
pointblankcolumns = "sex",
set = c("0", "1")
|>
) ::col_vals_between(
pointblankcolumns = "age_years",
left = 0,
right = 100,
inclusive = c(FALSE, TRUE),
na_pass = TRUE
)
Code
if (params$show_table && knitr::is_html_output()) {
|>
age_gender_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- age_gender_data |>
age_gender_data ::select(-c("Age", "Sex")) dplyr
2.2 Height, Weight, BMI and BSA
height_cm
will be mapped from the column Height
. weight_kg
will be mapped from the column Weight
.
bsa_m2
in m^2 will be calculated as sqrt([Height(cm) x Weight(kg)]/3600) bmi
will be calculated as Weight(kg)/((Height(m))^2)
All values are then converted to two decimal places.
Code
<- cohort_B_data |>
body_measurement_data ::select(c("cohort_unique_id",
dplyr"Height", "Weight")) |>
::mutate(
dplyrheight_cm = .data[["Height"]],
weight_kg = .data[["Weight"]],
bsa_m2 = sqrt((.data[["height_cm"]] * .data[["weight_kg"]]) / 3600),
bsa_m2 = harmonisation::round_to_nearest_digit(.data[["bsa_m2"]], digits = 2),
bmi = .data[["weight_kg"]] / ((.data[["height_cm"]] / 100)^2),
bmi = harmonisation::round_to_nearest_digit(.data[["bmi"]], digits = 2),
height_cm = harmonisation::round_to_nearest_digit(.data[["height_cm"]], digits = 2),
weight_kg = harmonisation::round_to_nearest_digit(.data[["weight_kg"]], digits = 2)
|>
) ::col_vals_between(
pointblankcolumns = "bmi",
left = 10,
right = 50,
inclusive = c(TRUE, TRUE),
na_pass = TRUE
)
Code
if (params$show_table && knitr::is_html_output()) {
|>
body_measurement_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- body_measurement_data |>
body_measurement_data ::select(-c("Height", "Weight")) dplyr
2.3 Smoking History
smoke_current
is grouped as follows:
Smoke History | smoke_current |
---|---|
non-smoker | 0 |
past smoker | 0 |
current smoker | 1 |
NA | -1 |
smoke_past
is grouped as follows:
Smoke History | smoke_past |
---|---|
non-smoker | 0 |
past smoker | 1 |
current smoker | 0 |
NA | -1 |
We do a check to ensure that we can only have these scenarios
smoke_current
as 1 andsmoke_past
as 0 for current smokerssmoke_current
as 0 andsmoke_past
as 1 for past smokerssmoke_current
as 0 andsmoke_past
as 0 for non-smokerssmoke_current
as -1 andsmoke_past
as -1 for unknown
Code
<- cohort_B_data |>
smoking_data ::select(c("cohort_unique_id",
dplyr"Smoke History")) |>
::mutate(
dplyrsmoke_current = dplyr::case_when(
is.na(.data[["Smoke History"]]) ~ "-1",
"Smoke History"]] == "non-smoker" ~ "0",
.data[["Smoke History"]] == "past smoker" ~ "0",
.data[["Smoke History"]] == "current smoker" ~ "1",
.data[[.default = NA_character_
),smoke_current = forcats::fct_relevel(
"smoke_current"]],
.data[[c("0", "1")),
smoke_past = dplyr::case_when(
is.na(.data[["Smoke History"]]) ~ "-1",
"Smoke History"]] == "non-smoker" ~ "0",
.data[["Smoke History"]] == "past smoker" ~ "1",
.data[["Smoke History"]] == "current smoker" ~ "0",
.data[[.default = NA_character_
),smoke_past = forcats::fct_relevel(
"smoke_past"]],
.data[[c("0", "1")),
`Smoke History` = forcats::fct(
"Smoke History"]]
.data[[
)|>
) ::col_vals_in_set(
pointblankcolumns = c("smoke_current", "smoke_past"),
set = c("0", "1", "-1")
|>
) ::col_vals_expr(
pointblankexpr = pointblank::expr(
"smoke_current"]] == "1" & .data[["smoke_past"]] == "0") |
(.data[["smoke_current"]] == "-1" & .data[["smoke_past"]] == -"1") |
(.data[["smoke_current"]] == "0" & .data[["smoke_past"]] %in% c("0", "1"))
(.data[[
) )
Code
if (params$show_table && knitr::is_html_output()) {
|>
smoking_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- smoking_data |>
smoking_data ::select(-c("Smoke History")) dplyr
2.4 Chest Pain
2.4.1 Shortness of Breath
have_sob
is grouped as follows:
Dyspnea | have_sob |
---|---|
no | 0 |
yes | 1 |
Code
<- cohort_B_data |>
shortness_of_breath_data ::select(c("cohort_unique_id", "Dyspnea")) |>
dplyr::mutate(
dplyrhave_sob = dplyr::case_when(
"Dyspnea"]] == "no" ~ "0",
.data[["Dyspnea"]] == "yes" ~ "1",
.data[[.default = NA_character_
),have_sob = forcats::fct_relevel(
as.character(.data[["have_sob"]]),
c("0", "1")),
Dyspnea = forcats::fct_relevel(
as.character(.data[["Dyspnea"]]),
c("no", "yes")),
|>
) ::col_vals_in_set(
pointblankcolumns = c("have_sob"),
set = c("0", "1", "-1")
)
Code
if (params$show_table && knitr::is_html_output()) {
|>
shortness_of_breath_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- shortness_of_breath_data |>
shortness_of_breath_data ::select(-c("Dyspnea")) dplyr
2.4.2 Have chest pain or not
have_chest_pain
is grouped as follows:
Chest Pain Character | have_chest_pain |
---|---|
no chest pain | 0 |
typical, atypical or nonanginal | 1 |
Code
<- cohort_B_data |>
have_chest_pain_data ::select(c("cohort_unique_id", "Chest Pain Character")) |>
dplyr::mutate(
dplyrhave_chest_pain = dplyr::case_when(
"Chest Pain Character"]] %in% c("no chest pain") ~ "0",
.data[["Chest Pain Character"]] %in% c("typical", "atypical", "nonanginal") ~ "1",
.data[[.default = NA_character_
),have_chest_pain = forcats::fct_relevel(
"have_chest_pain"]],
.data[[c("0", "1")
),`Chest Pain Character` = forcats::fct_relevel(
as.character(.data[["Chest Pain Character"]]),
c("no chest pain", "typical", "atypical", "nonanginal")
)|>
) ::col_vals_in_set(
pointblankcolumns = c("have_chest_pain"),
set = c("0", "1")
)
Code
if (params$show_table && knitr::is_html_output()) {
|>
have_chest_pain_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- have_chest_pain_data |>
have_chest_pain_data ::select(-c("Chest Pain Character")) dplyr
2.4.3 Symptomatic or Asymptomatic
symptoms
is grouped as follows:
have_sob | have_chest_pain | symptoms |
---|---|---|
-1 | -1 | -1 |
0 | 0 | 0 |
0 or 1 | 1 | 1 |
1 | 0 | 2 |
Code
<- cohort_B_data |>
symptoms_data ::select(c("cohort_unique_id")) |>
dplyr::inner_join(shortness_of_breath_data,
dplyrby = dplyr::join_by("cohort_unique_id"),
unmatched = "error",
relationship = "one-to-one") |>
::inner_join(have_chest_pain_data,
dplyrby = dplyr::join_by("cohort_unique_id"),
unmatched = "error",
relationship = "one-to-one") |>
::mutate(
dplyrsymptoms = dplyr::case_when(
"have_chest_pain"]] == "-1" &
(.data[["have_sob"]] == "-1"
.data[[~ "-1",
) "have_chest_pain"]] == "0" &
(.data[["have_sob"]] == "0"
.data[[~ "0",
) "have_chest_pain"]] == "1" &
(.data[["have_sob"]] %in% c("0", "1")
.data[[~ "1",
) "have_chest_pain"]] == "0" &
(.data[["have_sob"]] == "1"
.data[[~ "2",
) .default = NA_character_
),symptoms = forcats::fct_relevel(
"symptoms"]],
.data[[c("0", "1", "2"))
|>
) ::col_vals_in_set(
pointblankcolumns = c("symptoms"),
set = c("0", "1", "2")
)
Code
if (params$show_table && knitr::is_html_output()) {
|>
symptoms_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- symptoms_data |>
symptoms_data ::select(-c("have_chest_pain", "have_sob")) dplyr
2.4.4 Chest Pain Type
chest_pain_type
is grouped as follows:
Dyspnea | Chest Pain Character | chest_pain_type |
---|---|---|
no | no chest pain | 0 |
no or yes | typical | 1 |
no or yes | atypical | 2 |
no or yes | nonanginal | 3 |
yes | no chest pain | 4 |
Code
<- cohort_B_data |>
chest_pain_type_data ::select(c("cohort_unique_id", "Chest Pain Character", "Dyspnea")) |>
dplyr::mutate(
dplyrchest_pain_type = dplyr::case_when(
"Chest Pain Character"]] == "no chest pain" &
(.data[["Dyspnea"]] == "no"
.data[[~ "0",
) "Chest Pain Character"]] == "typical" &
(.data[["Dyspnea"]] %in% c("no", "yes")
.data[[~ "1",
) "Chest Pain Character"]] == "atypical" &
(.data[["Dyspnea"]] %in% c("no", "yes")
.data[[~ "2",
) "Chest Pain Character"]] == "nonanginal" &
(.data[["Dyspnea"]] %in% c("no", "yes")
.data[[~ "3",
) "Chest Pain Character"]] == "no chest pain" &
(.data[["Dyspnea"]] == "yes"
.data[[~ "4",
) .default = NA_character_
),`Chest Pain Character` = forcats::fct_relevel(
as.character(.data[["Chest Pain Character"]]),
c("no chest pain", "typical", "atypical", "nonanginal")
),`Dyspnea` = forcats::fct_relevel(
as.character(.data[["Dyspnea"]]),
c("no", "yes")
),chest_pain_type = forcats::fct_relevel(
"chest_pain_type"]],
.data[[c("0", "1", "2", "3"))
|>
) ::relocate(
dplyr"Chest Pain Character",
.after = "cohort_unique_id"
|>
) ::col_vals_in_set(
pointblankcolumns = c("chest_pain_type"),
set = c("0", "1", "2", "3", "4")
)
Code
if (params$show_table && knitr::is_html_output()) {
|>
chest_pain_type_data ::reactable_with_download_csv_button()
harmonisation }
Remove unnecessary columns so that we can merge with the other fields.
Code
<- chest_pain_type_data |>
chest_pain_type_data ::select(-c("Dyspnea", "Chest Pain Character")) dplyr
2.5 Combine Demographics
We combine all the data to give the demo_behave_data
.
Code
<- dplyr::join_by("cohort_unique_id")
join_specification
<- cohort_B_data |>
demo_behave_data ::select(c("cohort_unique_id")) |>
dplyr::inner_join(age_gender_data,
dplyrby = join_specification,
unmatched = "error",
relationship = "one-to-one") |>
::inner_join(body_measurement_data,
dplyrby = join_specification,
unmatched = "error",
relationship = "one-to-one") |>
::inner_join(smoking_data,
dplyrby = join_specification,
unmatched = "error",
relationship = "one-to-one") |>
::inner_join(chest_pain_data,
dplyrby = join_specification,
unmatched = "error",
relationship = "one-to-one") |>
::relocate(c("bsa_m2", "bmi"),
dplyr.after = "sex")
::expect_true(
testthat::has_columns(
pointblank
demo_behave_data,columns = c(
"age_years", "sex",
"height_cm", "weight_kg", "bsa_m2", "bmi",
"smoke_current", "smoke_past",
"have_sob", "have_chest_pain",
"symptoms", "chest_pain_type"
)
)
)
::expect_equal(
testthatncol(demo_behave_data), 13
)
3 Write Preprocessed File
We output data to be used for the next session.
Code
|>
demo_behave_data ::write_fst(
fstpath = here::here(
$analysis_folder,
params$harmonisation_folder,
params$preprocessing_folder,
params"02_demographic_data.fst"),
)