MS1 - City Temperature datasets

Published

February 13, 2025

Setup
library(tidyverse)
library(arrow)
library(assertr)

source("0-context.R")

Components

SALURBAL

The SALURBAL TMPDAILY v2 (Dataset) was used to operationalize desired SALURBAL temperature daily. Note that this dataset contains both unweighted and population weighted daily city level temperature.

## Connect to CCUH database
db_tmpdaily_v2 = "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/salurbal_TMPDAILY__v2/salurbal_TMPDAILY__v2.parquet" |> 
   arrow::open_dataset() 

## Query
df_daily_temp_salurbal <- db_tmpdaily_v2  %>% 
  filter(geo == 'L1AD', iso2%in% c("BR",'GT','PA')) %>% 
  select(geo, salid, year, iso2, date, TEMPX, TEMPPW) %>% 
  collect() 

## Preview
glimpse(df_daily_temp_salurbal)
Rows: 1,212,018
Columns: 7
$ geo    <chr> "L1AD", "L1AD", "L1AD", "L1AD", "L1AD", "L1AD", "L1AD", "L1AD",…
$ salid  <chr> "102101", "102101", "102101", "102101", "102101", "102101", "10…
$ year   <chr> "2000", "2000", "2000", "2000", "2000", "2000", "2000", "2000",…
$ iso2   <chr> "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR…
$ date   <date> 2000-01-01, 2000-01-02, 2000-01-03, 2000-01-04, 2000-01-05, 20…
$ TEMPX  <dbl> 25.49197, 24.39721, 25.11459, 25.42047, 24.88278, 24.32958, 25.…
$ TEMPPW <dbl> 25.43646, 24.91790, 25.52230, 25.73837, 25.04037, 24.69586, 25.…

Okay looks good.

USA

We will pull population weighted will pull from the CCUH PRISM County10 zonal statics dataset.

## Connect to data model 
db_prism ="//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH_read_only_access/PRISM/prism_zonal_stats_v1/clean/county10.parquet" |> 
  arrow::open_dataset() 



## DSPH Neighborhood counts
df_daily_temp_phl <- db_prism  %>% 
  filter(state == 'PA',
         measure == 'tmean') %>% 
  collect() %>% 
  filter(geoid == '42101') %>% 
  pivot_wider(names_from = measure, values_from = value)

## Preview
glimpse(df_daily_temp_phl)
Rows: 6,939
Columns: 6
$ geoid <chr> "42101", "42101", "42101", "42101", "42101", "42101", "42101", "…
$ date  <date> 2023-01-01, 2023-01-02, 2023-01-03, 2023-01-04, 2023-01-05, 202…
$ geo   <chr> "county10", "county10", "county10", "county10", "county10", "cou…
$ state <chr> "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA"…
$ year  <int> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023…
$ tmean <dbl> 10.1386881, 8.5863771, 8.8885345, 12.2293072, 14.8626919, 11.253…

We will get the population weighted averages from our datawarehouse.

Harmonized

Now let’s harmonize into a single dataset.

Counts

## SALURBAL
df_salurbal = df_daily_temp_salurbal %>% 
  left_join(xwalk_l2 %>% select(salid = salid1, salid1_name) %>% distinct())%>%
  mutate( city_geo = 'L1AD') %>%
  select(iso2, city_geo, city_geoid = salid, city_name = salid1_name,
         geo,  date, temperature = TEMPX)


## Philadelphia
df_usa = df_daily_temp_phl %>% 
  mutate(iso2 = 'US', 
         city_geo = 'county10',
         city_geoid = '42101', 
         city_name = 'Philadelphia County') %>% 
  select(iso2, city_geo, city_geoid , city_name ,
         geo,  date, temperature = tmean)


## Harmonized
df_city_temp = bind_rows(df_salurbal, df_usa)
df_city_temp %>% arrow::write_parquet(context$df_city_temp)

Let’s preview

df_city_temp %>% 
  group_by(iso2) %>% 
  slice(1:2) 
iso2 city_geo city_geoid city_name geo date temperature
BR L1AD 102101 Rio Branco L1AD 2000-01-01 25.491968
BR L1AD 102101 Rio Branco L1AD 2000-01-02 24.397211
GT L1AD 203101 Escuintla L1AD 2000-01-01 20.972955
GT L1AD 203101 Escuintla L1AD 2000-01-02 22.858728
PA L1AD 206101 Panama City L1AD 2000-01-01 23.834009
PA L1AD 206101 Panama City L1AD 2000-01-02 24.368616
US county10 42101 Philadelphia County county10 2023-01-01 10.138688
US county10 42101 Philadelphia County county10 2023-01-02 8.586377

Let’s do a quick plot of a single city in eachc ountry.

vec__geoid = df_city_temp %>% group_by(iso2) %>% slice(1) %>% pull(city_geoid)
df_city_temp %>% 
  mutate(iso2_geo = glue::glue("{iso2} ({city_geo})")) %>% 
  filter(city_geoid %in% vec__geoid) %>% 
  ggplot(aes(x = date, y = temperature))+
  geom_line() +
  facet_wrap(~iso2_geo) +
  labs(title = 'Daily temperature for a city in each country')+
  theme_minimal()

Access

```{r}
df_city_temp =  "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/_manuscripts/ccuh-server-MS1/v1.0/df_city_temp.parquet" |> 
  arrow::open_dataset() %>% 
  dplyr::collect()
```