#install.packages("usethis")
::use_course("https://github.com/r-journalism/nicar-2024-tidycensus/archive/master.zip")
usethis
# Run this in the console of RStudio
file.edit("03_common_census_queries.R")
Common Census queries
You can follow along with the 03_common_census_queries.R
file in the nicar-2024-tidycensus project folder that you downloaded in the intro link.
The repo containing the data and scripts for this section is on Github. To install those files, run the lines of code below.
To follow along with this walkthrough, simply run the lines of code in the gray boxes in the R console. Be sure to run them in order. If you run into an error, it may be because you skipped running some preceding lines of code.
Load libraries
library(tidyverse)
library(tidycensus)
Example of iterating with loops
Here’s a basic “for loop” which includes setting the limits for the loop to 10.
for (i in 1:10) {
print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
Explaining loops in R
Manually, this would have looked like
print(1)
thenprint(2)
thenprint(3)
one by one.Each loop iterates the
i
within the established limits (1:10)But this is a way to run code many times with slight variations to a value or values in the code. It all goes between the
{
and}
.
Multiple years of Census data
- Set up a way to append new data to the original data
<- tibble() # creates a blank data frame
big_census_data
for (i in 2020:2022) {
<- get_acs( # temporary dataframe
median_df geography = "county",
variables = "B25077_001", # median home values
year = i
|>
) mutate(year = i) # so we can identify which year
<- bind_rows(big_census_data, median_df) |>
big_census_data arrange(GEOID, year)
# appends the temporary dataframe to the permanent one
}
Getting data from the 2016-2020 5-year ACS
Getting data from the 2017-2021 5-year ACS
Getting data from the 2018-2022 5-year ACS
big_census_data
# A tibble: 9,664 × 6
GEOID NAME variable estimate moe year
<chr> <chr> <chr> <dbl> <dbl> <int>
1 01001 Autauga County, Alabama B25077_001 161200 6652 2020
2 01001 Autauga County, Alabama B25077_001 164900 6871 2021
3 01001 Autauga County, Alabama B25077_001 191800 7996 2022
4 01003 Baldwin County, Alabama B25077_001 211600 5853 2020
5 01003 Baldwin County, Alabama B25077_001 226600 5984 2021
6 01003 Baldwin County, Alabama B25077_001 266000 6916 2022
7 01005 Barbour County, Alabama B25077_001 86500 9981 2020
8 01005 Barbour County, Alabama B25077_001 89500 11054 2021
9 01005 Barbour County, Alabama B25077_001 102700 11171 2022
10 01007 Bibb County, Alabama B25077_001 96400 13625 2020
# ℹ 9,654 more rows
Quickly calculate percent change
library(tidyr)
<- big_census_data |>
home_value_change ungroup() |>
filter(year!=2021) |>
select(NAME, estimate, year) |>
pivot_wider(names_from="year", values_from="estimate") |>
mutate(change=round((`2022`-`2020`)/`2020`*100,2)) |>
arrange(desc(change))
home_value_change
# A tibble: 3,230 × 4
NAME `2020` `2022` change
<chr> <dbl> <dbl> <dbl>
1 Jackson County, South Dakota 58600 108400 85.0
2 Crockett County, Texas 102200 181300 77.4
3 Real County, Texas 146900 258000 75.6
4 De Baca County, New Mexico 130000 219200 68.6
5 Keya Paha County, Nebraska 65000 109000 67.7
6 Daggett County, Utah 152500 252400 65.5
7 Stewart County, Georgia 44100 72500 64.4
8 Gem County, Idaho 198700 324700 63.4
9 Shackelford County, Texas 110000 178500 62.3
10 Sterling County, Texas 75000 121000 61.3
# ℹ 3,220 more rows
Looping through states to get tracts
<- c("DC", "MD", "VA") # Get a list of state names or abbreviations
state_names
<- tibble()
tract_data for (i in 1:length(state_names)) {
<- get_acs(
tract_df geography = "tract",
variables = "B25077_001",
year = 2022,
state=state_names[i] # Swap out the state name in the array
)
<- bind_rows(tract_data, tract_df)
tract_data }
Getting data from the 2018-2022 5-year ACS
Getting data from the 2018-2022 5-year ACS
Getting data from the 2018-2022 5-year ACS
tract_data
# A tibble: 3,879 × 5
GEOID NAME variable estimate moe
<chr> <chr> <chr> <dbl> <dbl>
1 11001000101 Census Tract 1.01; District of Columbia… B25077_… 635000 368769
2 11001000102 Census Tract 1.02; District of Columbia… B25077_… 1382800 636468
3 11001000201 Census Tract 2.01; District of Columbia… B25077_… NA NA
4 11001000202 Census Tract 2.02; District of Columbia… B25077_… 1385700 124023
5 11001000300 Census Tract 3; District of Columbia; D… B25077_… 1110900 40872
6 11001000400 Census Tract 4; District of Columbia; D… B25077_… 1620800 494698
7 11001000501 Census Tract 5.01; District of Columbia… B25077_… 1131400 180121
8 11001000502 Census Tract 5.02; District of Columbia… B25077_… 1168900 532289
9 11001000600 Census Tract 6; District of Columbia; D… B25077_… 1426300 221155
10 11001000702 Census Tract 7.02; District of Columbia… B25077_… 365400 22326
# ℹ 3,869 more rows
Exporting to a csv
write_csv(tract_data, "tract_data.csv", na="")
Get a list of state names and/or abbreviations
- Pull a list of state names from the depths of R with
state.name
- Pull a list of state abbreviations from the depths of R with
state.abb
- Combine them into a dataframe and don’t forget to add in DC to make a name/abbreviation relationship file
<- c(state.name, "District of Columbia")
state_names <- c(state.abb, "DC")
state_abbs
<- data.frame(state_names, state_abbs)
state_df state_df
state_names state_abbs
1 Alabama AL
2 Alaska AK
3 Arizona AZ
4 Arkansas AR
5 California CA
6 Colorado CO
7 Connecticut CT
8 Delaware DE
9 Florida FL
10 Georgia GA
11 Hawaii HI
12 Idaho ID
13 Illinois IL
14 Indiana IN
15 Iowa IA
16 Kansas KS
17 Kentucky KY
18 Louisiana LA
19 Maine ME
20 Maryland MD
21 Massachusetts MA
22 Michigan MI
23 Minnesota MN
24 Mississippi MS
25 Missouri MO
26 Montana MT
27 Nebraska NE
28 Nevada NV
29 New Hampshire NH
30 New Jersey NJ
31 New Mexico NM
32 New York NY
33 North Carolina NC
34 North Dakota ND
35 Ohio OH
36 Oklahoma OK
37 Oregon OR
38 Pennsylvania PA
39 Rhode Island RI
40 South Carolina SC
41 South Dakota SD
42 Tennessee TN
43 Texas TX
44 Utah UT
45 Vermont VT
46 Virginia VA
47 Washington WA
48 West Virginia WV
49 Wisconsin WI
50 Wyoming WY
51 District of Columbia DC
Common census queries
- Diversity scores for counties
- Poverty quantiles for counties
- Population by age groups and gender by state
- Check it out in
03_common_census_queries.R