What you are trying to achieve is called a spatial join. Functions like sf::st_intersection()
or st_join()
are useful for this purpose. In your case, st_join()
is much faster and better as you are only interested in one-to-one, point-to-polygon relationships.
One issue you had with your code is that you needed to use st_transform()
to project your MD object to the same CRS as datsf. If you try to do that with st_set_crs()
, all it does is treat the coordinate values literally. This is why it throws a warning if you do so.
If you use st_transform()
, it will throw a warning as the CRS of your data are not planar. You can safely ignore this waring in this instance. This is another advantage of using st_join()
, as it can handle both geographic and projected CRS.
Here's a step-by-step approach to achieve your desired outcome:
library(sf)
library(dplyr)
EIA_Data <- read.table(text = "Latitude Longitude
39.18207 -76.53715
39.18207 -76.53715
39.17810 -76.52680
39.17810 -76.52680
39.17810 -76.52680
39.17810 -76.52680
39.44284 -76.22176
39.44284 -76.22176
39.44284 -76.22176
39.44284 -76.22176
39.44284 -76.22176", header = TRUE)
# Add unique id for illustrative purposes
EIA_Data <- mutate(EIA_Data, id = 1:n())
# Create sf points object from EIA_Data
datsf <- st_as_sf(EIA_Data, coords = c("Longitude", "Latitude"), crs = 4326)
MD <- tidycensus::get_acs(state = "MD", geography = "tract",
variables = "B19013_001", geometry = TRUE)
# Project MD to WGS84/EPSG:4326
MD <- st_transform(MD, 4326)
# Spatially join data and subset columns
datsf <- datsf %>%
st_join(., MD) %>%
select(id, NAME)
datsf
# Simple feature collection with 11 features and 2 fields
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: -76.53715 ymin: 39.1781 xmax: -76.22176 ymax: 39.44284
# Geodetic CRS: WGS 84
# First 10 features:
# id NAME geometry
# 1 1 Census Tract 7301.02; Anne Arundel County; Maryland POINT (-76.53715 39.18207)
# 2 2 Census Tract 7301.02; Anne Arundel County; Maryland POINT (-76.53715 39.18207)
# 3 3 Census Tract 7301.02; Anne Arundel County; Maryland POINT (-76.5268 39.1781)
# 4 4 Census Tract 7301.02; Anne Arundel County; Maryland POINT (-76.5268 39.1781)
# 5 5 Census Tract 7301.02; Anne Arundel County; Maryland POINT (-76.5268 39.1781)
# 6 6 Census Tract 7301.02; Anne Arundel County; Maryland POINT (-76.5268 39.1781)
# 7 7 Census Tract 3024; Harford County; Maryland POINT (-76.22176 39.44284)
# 8 8 Census Tract 3024; Harford County; Maryland POINT (-76.22176 39.44284)
# 9 9 Census Tract 3024; Harford County; Maryland POINT (-76.22176 39.44284)
# 10 10 Census Tract 3024; Harford County; Maryland POINT (-76.22176 39.44284)
# Return result as df
EIA_Data <- datsf %>%
mutate(Latitude = st_coordinates(.)[,2],
Longitude = st_coordinates(.)[,1]) %>%
st_drop_geometry() %>%
select(-c(id))
EIA_Data
# NAME Latitude Longitude
# 1 Census Tract 7301.02; Anne Arundel County; Maryland 39.18207 -76.53715
# 2 Census Tract 7301.02; Anne Arundel County; Maryland 39.18207 -76.53715
# 3 Census Tract 7301.02; Anne Arundel County; Maryland 39.17810 -76.52680
# 4 Census Tract 7301.02; Anne Arundel County; Maryland 39.17810 -76.52680
# 5 Census Tract 7301.02; Anne Arundel County; Maryland 39.17810 -76.52680
# 6 Census Tract 7301.02; Anne Arundel County; Maryland 39.17810 -76.52680
# 7 Census Tract 3024; Harford County; Maryland 39.44284 -76.22176
# 8 Census Tract 3024; Harford County; Maryland 39.44284 -76.22176
# 9 Census Tract 3024; Harford County; Maryland 39.44284 -76.22176
# 10 Census Tract 3024; Harford County; Maryland 39.44284 -76.22176
# 11 Census Tract 3024; Harford County; Maryland 39.44284 -76.22176
A simplified version combining the above code:
# Recreate datsf (use previously projected MD object)
EIA_Data <- read.table(text = "Latitude Longitude
39.18207 -76.53715
39.18207 -76.53715
39.17810 -76.52680
39.17810 -76.52680
39.17810 -76.52680
39.17810 -76.52680
39.44284 -76.22176
39.44284 -76.22176
39.44284 -76.22176
39.44284 -76.22176
39.44284 -76.22176", header = TRUE)
datsf <- st_as_sf(EIA_Data, coords = c("Longitude", "Latitude"), crs = 4326)
EIA_Data <- st_join(datsf, MD) %>%
mutate(Latitude = st_coordinates(.)[,2],
Longitude = st_coordinates(.)[,1]) %>%
st_drop_geometry() %>%
select(c(NAME, Latitude, Longitude))
joined_df <- sf::st_join(datsf, MD)
. You can also read more from the author of thetidycensus
package here