# Read data from a web site

It turns out that I need to get some met data from a web site. A cool thing about this web site is that it stores all the data in a date formatted way, so that means I can easily loop through the period I want. But the questions is how to read data from a web site. I tried XML packages, which may overkill the problem I have. Long story short, read.table or read.delim can solve this problem very nicely.

#####################################################################

# Read Houston met data from:
# For all data MJJA, 2005-2008
# Lei Zhu, 01/24/14
# Prefix and Subfix
Subfix <- "DailyHistory.html?format=1"
# MJJAS, 2005-2008, daily
wind_speed <- array(NA,dim=c(4,5,31))
for(year in 2005:2008){
for(month in 5:9){
if(month==6 || month==9){
ndays <- 30
}else{
ndays <- 31
}
# Loop day
for(day in 1:ndays){
print(paste("Processing: ",as.character(year),"/",as.character(month),"/",as.character(day),sep=""))
# Met data web
address <- paste(Prefix,as.character(year),"/",as.character(month),"/",as.character(day),"/",Subfix,sep="") # Data location
# Read in table
# For save wind speed
temp <- c()
# # of records for that day
nlines <- dim(data_temp)[1]
# Loop every line
for(line in 1:nlines){
character_temp <- unlist(strsplit(as.character(data_temp$TimeCDT[line]),":")) hour_data <- as.integer(character_temp[1]) min_data <- as.integer(unlist(strsplit(character_temp[2]," "))[1]) AM_flag <- unlist(strsplit(character_temp[2]," "))[2]=="AM" PM_flag <- unlist(strsplit(character_temp[2]," "))[2]=="PM" # 12:53 AM --> 0 if(AM_flag && hour_data==12){ hour_data <- 0 } # 12:53 PM --> 12; 1:53 PM -->13 if(PM_flag && hour_data!=12){ hour_data <- hour_data + 12 } # Is this the hour you want if(hour_data>=12 && hour_data<=13){ if(as.character(data_temp$Wind.SpeedMPH[line])!="Calm" && as.numeric(as.character(data_temp$Wind.SpeedMPH[line]))!=-9999){ temp <- c(temp,0.44704*as.numeric(as.character(data_temp$Wind.SpeedMPH[line]))) # mph -> m/s }
}
}
}
# Get the mean wind speed for the period you want
if(length(temp)==0){
print("   --- NO wind speed or only calm for the period you want!")
}else{
wind_speed[year-2005+1,month-5+1,day] <- mean(temp)
}
} # Loop day
}   # Loop month
}     # Loop year
# Get statistics
mean(wind_speed,na.rm=T)
sd(as.vector(wind_speed),na.rm=T)
hist(wind_speed)