R works with ESRI Shape file

ESRI Shape files are very important in Atmospheric Chemistry Modelling, as a lot of variables are reported/saved on county, state or country level. Very often we need to convert a shape file into rasters to achieve the "emission inventory". GIS software, like ArcGIS which I used a lot previously, can solve this kind of problem very easily and quickly. Those software, however, are not free. Instead, R provides an alternative solution without much cost except several lines of scripts.

Here shows a simple R function I wrote using RGDAL and RASTER packages. It gets area wighted value in user-defined grids over a specific region. It handles county-level values and saves the output as NetCDF file.

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Given a specific region, Counties and value of each county
# Get area wighted value in each cell
# And save output as a NetCDF file
# Need to install six packages: rgdal, raster, ncdf, Imap, fields, maps
# To intsall a package, try:
# install.packages("PACKAGE"), e.g, install.packages("rgdal")
# Lei Zhu, 02/23/14
#+++++++++++++++++++++++++++++++++++++++++++
# Input
#+++++++++++++++++++++++++++++++++++++++++++
# Define a region you want to extract, in degree
Lon_left  <- -74
Lon_right <- -69
Lat_low   <- 41
Lat_up    <- 43
# A list of counties
CountyName_Vector <- c("Franklin","Hampden","Dukes","Cheshire")
# A list of states that the above counties belong to
StateName_Vector  <- c("Massachusetts","Massachusetts","Massachusetts","New Hampshire")
# Give the county based values, eg, NOx emissions
Value_Vector      <- c(200, 300, 1000, 500)
# The output resolution you want
Resolution <- 0.005
# Input shape file direction
ShapeFile_dir <- "data/US_Shape"
# Output shape file direction
Output_dir <- "data/US_Shape"
Output_name <- "test.nc"
#+++++++++++++++++++++++++++++++++++++++++++
# Call the function
#+++++++++++++++++++++++++++++++++++++++++++
Shape2nc(CountyName_Vector, StateName_Vector, Value_Vector,Lon_left, Lon_right, Lat_low, Lat_up, Resolution, ShapeFile_dir, Output_dir, Output_name)
#+++++++++++++++++++++++++++++++++++++++++++
# Functions below
#+++++++++++++++++++++++++++++++++++++++++++
# =============================================================================
# Shape2nc
# Given a specific region, Counties and value of each county
# Get area wighted value in each cell
# And save output as a NetCDF file
# =============================================================================
Shape2nc <- function(CountyName_Vector,StateName_Vector,Value_Vector,
  Lon_left,Lon_right,Lat_low,Lat_up,Resolution,
  ShapeFile_dir,Output_dir,Output_name){
# Load libraries
library(rgdal); library(raster); library(ncdf); library(Imap); library(fields); library(maps)
# Check input vectors have the same length
temp <- c(length(CountyName_Vector),length(StateName_Vector),length(Value_Vector))
if(temp[1]!=temp[2]){
stop("CountyName_Vector and StateName_Vector have different length")
}
if(temp[1]!=temp[3]){
stop("CountyName_Vector and Value_Vector have different length")
}
# Read in the shape files
# Change the layer names if needed
Counties <- readOGR(dsn=paste(ShapeFile_dir,"Counties",sep="/"),layer ="gz_2010_us_050_00_5m")
States <- readOGR(dsn=paste(ShapeFile_dir,"States",sep="/"),layer ="gz_2010_us_040_00_5m")
# Vector to save county id
County_id_save <- c()
for(i in 1:length(CountyName_Vector)){
County_id <- which(Counties$NAME==CountyName_Vector[i])
if(identical(County_id,integer(0))){
print(CountyName_Vector[i])
stop("County name NOT found")
}
State_id <- which(States$NAME==StateName_Vector[i])
if(identical(State_id,integer(0))){
print(StateName_Vector[i])
stop("County name NOT found")
}
match_id <- which(Counties$STATE[County_id]==States$STATE[State_id])
County_id_save <- c(County_id_save,County_id[match_id])
}
# Extract the shape file within a certain area
Region <- extent(Lon_left,Lon_right,Lat_low,Lat_up) # Lon_left, Lon_right, Lat_low, Lat_up
Ranges <- abs(apply(as.matrix(bbox(Region)),1,diff)) # in degree
# Rasterize the shapefile within the region specified
Layer_extracted <- raster(Region,ncol=Ranges[1]/Resolution, nrow=Ranges[2]/Resolution)
Raster_extracted <- rasterize(Counties,Layer_extracted)
# Convert the extracted raster into a matrix
County_name_extracted <- as.matrix(Raster_extracted)
NRows <- dim(County_name_extracted)[1]
NCols <- dim(County_name_extracted)[2]
# Array to save values
County_value_extracted <- array(NA,dim=c(NRows,NCols))
# Calculate the value in each cell based on its area
for(row in 1:NRows){
for(col in 1:NCols){
temp_match_id <- which(County_id_save==County_name_extracted[row,col])
if(!identical(temp_match_id,integer(0))){ # Find the county id in County_id_save
# Areas in shape file saved in square miles
County_area <- Counties$CENSUSAREA[County_id_save[temp_match_id]]*2.59 #km2
lat_temp <- Lat_up-(row-1)*Resolution-0.5*Resolution
lon_temp <- Lon_left+(col-1)*Resolution+0.5*Resolution
Cell_area <-earth.dist(lon_temp,lat_temp,lon_temp+Resolution),lat_temp)*earth.dist(lon_temp,lat_temp,lon_temp,(lat_temp+Resolution))
# NOTICE: A raster file is saved from high latitude to low latitude, 
# BE CAREFUL!
County_value_extracted[(NRows+1-row),col] <- Cell_area/County_area*Value_Vector[temp_match_id]
}
}
}
# Plot the extracted value
# Define the netcdf coordinate variables
Lat <- seq((Lat_low +0.5*Resolution),(Lat_low +0.5*Resolution)+(NRows-1)*Resolution,by=Resolution)
Lon <- seq((Lon_left+0.5*Resolution),(Lon_left+0.5*Resolution)+(NCols-1)*Resolution,by=Resolution)
image.plot(Lon,Lat,t(County_value_extracted),horizontal=T,legend.shrink=1,axis.args = list(cex.axis = 1.2),legend.width=1,legend.mar=4,xlab='',ylab='',midpoint=T,main=paste("Value extracted",sep=""))
map('state',add=T,lwd=2) # State map
States_name <- unique(StateName_Vector)
for(state in 1:length(States_name)){
map('county',States_name[state],add=T,fill =F,col="gray40") # County map
}
# Save ncdf
dim_lat <- dim.def.ncdf("LAT","degree",as.double(Lat))
dim_lon <- dim.def.ncdf("LON","degree",as.double(Lon))
varz = var.def.ncdf("Value in each cell","NOT defined", list(dim_lat,dim_lon),-9999,longname="Value")
nc.ex = create.ncdf(paste(Output_dir,Output_name,sep="/"), varz )
put.var.ncdf(nc.ex,varz,County_value_extracted)
close.ncdf(nc.ex)
print(paste("NetCDF file saved at: ",Output_dir,"/",Output_name,sep=""))
} # end function Shape2nc
# =============================================================================
# earth.dist
# Calculate distance in kilometers between two points on the Earth surface
# =============================================================================
earth.dist <- function (long1,lat1,long2,lat2){
rad <- pi/180
a1 <- lat1*rad
a2 <- long1*rad
b1 <- lat2*rad
b2 <- long2*rad
dlon <- b2-a2
dlat <- b1-a1
a <- (sin(dlat/2))^2 + cos(a1)*cos(b1)*(sin(dlon/2))^2
c <- 2*atan2(sqrt(a), sqrt(1 - a))
R <- 6378.145 # km
d <- R*c
return(d)
} # end function earth.dist

Here is the output raster plot.

Handle shape file with R at county level

For county and state shape files, see the attached zip. Original R script is also attached.

us_shape.zip3.35 MB
script.zip2 KB