Setup
library(tidyverse)
library(arrow)
library(assertr)
source("0-context.R")
library(tidyverse)
library(arrow)
library(assertr)
source("0-context.R")
The SALURBAL TMPDAILY v2 (Dataset) was used to operationalize desired SALURBAL temperature daily. Note that this dataset contains both unweighted and population weighted daily city level temperature.
## Connect to CCUH database
= "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/salurbal_TMPDAILY__v2/salurbal_TMPDAILY__v2.parquet" |>
db_tmpdaily_v2 ::open_dataset()
arrow
## Query
<- db_tmpdaily_v2 %>%
df_daily_temp_salurbal filter(geo == 'L1AD', iso2%in% c("BR",'GT','PA')) %>%
select(geo, salid, year, iso2, date, TEMPX, TEMPPW) %>%
collect()
## Preview
glimpse(df_daily_temp_salurbal)
Rows: 1,212,018
Columns: 7
$ geo <chr> "L1AD", "L1AD", "L1AD", "L1AD", "L1AD", "L1AD", "L1AD", "L1AD",…
$ salid <chr> "102101", "102101", "102101", "102101", "102101", "102101", "10…
$ year <chr> "2000", "2000", "2000", "2000", "2000", "2000", "2000", "2000",…
$ iso2 <chr> "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR…
$ date <date> 2000-01-01, 2000-01-02, 2000-01-03, 2000-01-04, 2000-01-05, 20…
$ TEMPX <dbl> 25.49197, 24.39721, 25.11459, 25.42047, 24.88278, 24.32958, 25.…
$ TEMPPW <dbl> 25.43646, 24.91790, 25.52230, 25.73837, 25.04037, 24.69586, 25.…
Okay looks good.
We will pull population weighted will pull from the CCUH PRISM County10 zonal statics dataset.
## Connect to data model
="//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH_read_only_access/PRISM/prism_zonal_stats_v1/clean/county10.parquet" |>
db_prism ::open_dataset()
arrow
## DSPH Neighborhood counts
<- db_prism %>%
df_daily_temp_phl filter(state == 'PA',
== 'tmean') %>%
measure collect() %>%
filter(geoid == '42101') %>%
pivot_wider(names_from = measure, values_from = value)
## Preview
glimpse(df_daily_temp_phl)
Rows: 6,939
Columns: 6
$ geoid <chr> "42101", "42101", "42101", "42101", "42101", "42101", "42101", "…
$ date <date> 2023-01-01, 2023-01-02, 2023-01-03, 2023-01-04, 2023-01-05, 202…
$ geo <chr> "county10", "county10", "county10", "county10", "county10", "cou…
$ state <chr> "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA"…
$ year <int> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023…
$ tmean <dbl> 10.1386881, 8.5863771, 8.8885345, 12.2293072, 14.8626919, 11.253…
We will get the population weighted averages from our datawarehouse.
Now let’s harmonize into a single dataset.
## SALURBAL
= df_daily_temp_salurbal %>%
df_salurbal left_join(xwalk_l2 %>% select(salid = salid1, salid1_name) %>% distinct())%>%
mutate( city_geo = 'L1AD') %>%
select(iso2, city_geo, city_geoid = salid, city_name = salid1_name,
temperature = TEMPX)
geo, date,
## Philadelphia
= df_daily_temp_phl %>%
df_usa mutate(iso2 = 'US',
city_geo = 'county10',
city_geoid = '42101',
city_name = 'Philadelphia County') %>%
select(iso2, city_geo, city_geoid , city_name ,
temperature = tmean)
geo, date,
## Harmonized
= bind_rows(df_salurbal, df_usa)
df_city_temp %>% arrow::write_parquet(context$df_city_temp) df_city_temp
Let’s preview
%>%
df_city_temp group_by(iso2) %>%
slice(1:2)
iso2 | city_geo | city_geoid | city_name | geo | date | temperature |
---|---|---|---|---|---|---|
BR | L1AD | 102101 | Rio Branco | L1AD | 2000-01-01 | 25.491968 |
BR | L1AD | 102101 | Rio Branco | L1AD | 2000-01-02 | 24.397211 |
GT | L1AD | 203101 | Escuintla | L1AD | 2000-01-01 | 20.972955 |
GT | L1AD | 203101 | Escuintla | L1AD | 2000-01-02 | 22.858728 |
PA | L1AD | 206101 | Panama City | L1AD | 2000-01-01 | 23.834009 |
PA | L1AD | 206101 | Panama City | L1AD | 2000-01-02 | 24.368616 |
US | county10 | 42101 | Philadelphia County | county10 | 2023-01-01 | 10.138688 |
US | county10 | 42101 | Philadelphia County | county10 | 2023-01-02 | 8.586377 |
Let’s do a quick plot of a single city in eachc ountry.
= df_city_temp %>% group_by(iso2) %>% slice(1) %>% pull(city_geoid)
vec__geoid %>%
df_city_temp mutate(iso2_geo = glue::glue("{iso2} ({city_geo})")) %>%
filter(city_geoid %in% vec__geoid) %>%
ggplot(aes(x = date, y = temperature))+
geom_line() +
facet_wrap(~iso2_geo) +
labs(title = 'Daily temperature for a city in each country')+
theme_minimal()
```{r}
df_city_temp = "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/_manuscripts/ccuh-server-MS1/v1.0/df_city_temp.parquet" |>
arrow::open_dataset() %>%
dplyr::collect()
```