v2.0.0

avri: Modifies the output form of the results. The results are output in one table only, containing the mean and sd. The data can be optionally expressed in scientific notation. trs: added the 'fun' parameter, you can choose the resampling function, such as the maximum value. prop: new function to convert the time series into a proportional time series. statdf: remove the time column from the result. The 'prop' function can be called internally to account for the statistics of proportional time series. svri: New function to select a specific function to calculate the variation. For example the maximum value of during all 1:00. dm8n_np : New function, a simplified version of the dm8n function without the drawing module. dm8n_batch: New function, batch calculate the maximum eight-hour ozone at multiple sites. geom_ts ：New function, draw time series, support point, line, area, bar. geom_ts_batch: new function, plot time series in batch. geom_avri: new function, plot the average variation. geom_avri_batch: new function, plot the average variations in batch. geom_tsw: new function, plot the wind time series. loh\ofp\vocct function results added statistics tables containing: variance, quantile, maximum and minimum values. tuv_batch: fixed an error. Updated the VOC database and optimized the matching.
tianshu129 · Jan 18, 2022 · 2e47c65 · 2e47c65
1 parent e38e8ec
commit 2e47c65
Show file tree

Hide file tree

Showing 38 changed files with 1,966 additions and 185 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: foqat
 Type: Package
 Title: Field Observation Quick Analysis Toolkit
-Version: 1.7.2
+Version: 2.0.0
 Author: Tianshu Chen
 Maintainer: Tianshu Chen <[email protected]>
 Description: Tools for quickly processing and analyzing 
@@ -21,7 +21,7 @@ BugReports: https://github.com/tianshu129/foqat/issues
 Depends: R (>= 3.5.0)
 Imports: lubridate, magrittr, dplyr, plyr, stats, stringr, utils,
         lmodel2, reshape2, ggplot2, ggplotify, gridExtra,
-        scales, rvest, xml2
+        scales, rvest, xml2, ggnewscale, patchwork
 License: GPL-3 | file LICENSE
 Encoding: UTF-8
 LazyData: true

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,12 +4,21 @@ export(afp)
 export(anylm)
 export(avri)
 export(dm8n)
+export(dm8n_batch)
+export(dm8n_np)
 export(fm)
+export(geom_avri)
+export(geom_avri_batch)
+export(geom_ts)
+export(geom_ts_batch)
+export(geom_tsw)
 export(koh)
 export(loh)
 export(nsvp)
 export(ofp)
+export(prop)
 export(statdf)
+export(svri)
 export(transp)
 export(trs)
 export(tsplotp)
@@ -19,22 +28,37 @@ export(vocct)
 import(ggplot2)
 import(lubridate)
 import(magrittr)
+import(patchwork)
 importFrom(dplyr,full_join)
 importFrom(dplyr,left_join)
+importFrom(ggnewscale,new_scale_color)
+importFrom(ggnewscale,new_scale_fill)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,aes_string)
 importFrom(ggplot2,as_labeller)
 importFrom(ggplot2,element_blank)
 importFrom(ggplot2,element_text)
 importFrom(ggplot2,facet_wrap)
 importFrom(ggplot2,geom_abline)
+importFrom(ggplot2,geom_area)
+importFrom(ggplot2,geom_bar)
 importFrom(ggplot2,geom_line)
 importFrom(ggplot2,geom_point)
+importFrom(ggplot2,geom_text)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,ggplotGrob)
 importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,scale_color_discrete)
 importFrom(ggplot2,scale_color_gradientn)
+importFrom(ggplot2,scale_color_manual)
 importFrom(ggplot2,scale_colour_gradientn)
+importFrom(ggplot2,scale_colour_viridis_c)
+importFrom(ggplot2,scale_fill_discrete)
+importFrom(ggplot2,scale_fill_manual)
+importFrom(ggplot2,scale_linetype_manual)
+importFrom(ggplot2,scale_shape_manual)
+importFrom(ggplot2,scale_x_datetime)
+importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,theme)
 importFrom(ggplot2,theme_bw)
 importFrom(ggplotify,as.ggplot)
@@ -46,6 +70,7 @@ importFrom(lmodel2,lmodel2)
 importFrom(lubridate,duration)
 importFrom(lubridate,hour)
 importFrom(lubridate,hours)
+importFrom(lubridate,is.timepoint)
 importFrom(lubridate,minute)
 importFrom(lubridate,minutes)
 importFrom(lubridate,second)
@@ -55,12 +80,15 @@ importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
 importFrom(rvest,html_nodes)
 importFrom(rvest,html_text)
+importFrom(scales,hue_pal)
 importFrom(scales,pretty_breaks)
 importFrom(scales,rescale)
 importFrom(stats,aggregate)
+importFrom(stats,complete.cases)
 importFrom(stats,na.omit)
 importFrom(stats,quantile)
 importFrom(stats,sd)
+importFrom(stats,setNames)
 importFrom(stringr,str_split_fixed)
 importFrom(utils,download.file)
 importFrom(utils,read.delim)

diff --git a/R/avri.R b/R/avri.R
@@ -25,23 +25,24 @@
 #' @param wind logical value. if TRUE, please set coliwd, coliws.
 #' @param coliws numeric value, column index of wind speed in dataframe.
 #' @param coliwd numeric value, column index of wind direction (degree) in dataframe.
+#' @param sn logical value. if TRUE, the results will be presented by scientific notation (string).
 #' @return  a list with 2 dataframe (average and SD). The first column of dataframe is the serial number within the period. The
 #' average variation (or SD) start from the second column. \cr
 #' Note that when the pattern USES
 #' "ncycle" or "custom", the start time determines the start time of the first
 #' element in the average variation. For example, if the first timestamp of data is
 #' "2010-05-01 12:00:00", the resolution is 1 hour, the mode is "ncycle", and the
 #' value is 24, then the result represents diurnal variation starting from 12 o'clock.
-
+#'
 #' @export
 #' @examples
 #' avri(met, bkip = "1 hour", mode = "recipes", value = "day",
 #' st = "2017-05-01 00:00:00", wind = TRUE, coliws = 4, coliwd = 5)
-#' @importFrom dplyr full_join
+#' @importFrom dplyr full_join left_join
 #' @importFrom stats aggregate
 #' @importFrom lubridate duration
 
-avri<-function(df, bkip=NULL, mode = "recipes", value = "day", st = NULL, et = NULL, na.rm = TRUE, digits = 2, wind = FALSE, coliws = 2, coliwd = 3){
+avri<-function(df, bkip=NULL, mode = "recipes", value = "day", st = NULL, et = NULL, na.rm = TRUE, digits = 2, wind = FALSE, coliws = 2, coliwd = 3, sn=FALSE){
 
   #time resampling
   if(mode!="custom"){
@@ -83,7 +84,7 @@ avri<-function(df, bkip=NULL, mode = "recipes", value = "day", st = NULL, et = N
       mod_list=month(rs_df[,1])
     }
   }else if(mode=="ncycle"){
-    mod_list=seq(0,nrow(rs_df)-1,1)%%value
+    mod_list=seq(0,nrow(rs_df)-1,1)%%as.numeric(value)
   }else if(mode=="custom"){
     mod_list=rs_df[,1]
   }
@@ -125,18 +126,34 @@ avri<-function(df, bkip=NULL, mode = "recipes", value = "day", st = NULL, et = N
   #format average data (avoid NA)
   if(!all(is.na(results[, -1]))){
 	if(ncol(results)==2){
-		results[,-1]=do.call(rbind, lapply(results[,-1], formatC, format = "e", digits = digits))
+		if(sn==TRUE){
+			results[,-1]=do.call(rbind, lapply(results[,-1], formatC, format = "e", digits = digits))
+		}else{
+			results[,-1]=do.call(rbind, lapply(results[,-1], as.numeric))
+		}
 	}else{
-		results[,-1]=lapply(results[,-1], formatC, format = "e", digits = digits)
+		if(sn==TRUE){
+			results[,-1]=lapply(results[,-1], formatC, format = "e", digits = digits)
+		}else{
+			results[,-1]=lapply(results[,-1], as.numeric)
+		}
 	}
   }
 
   #format sd data (avoid NA)
   if(!all(is.na(results_sd[, -1]))){
 	if(ncol(results_sd)==2){
-		results_sd[,-1]=do.call(rbind, lapply(results_sd[,-1], formatC, format = "e", digits = digits))
+		if(sn==TRUE){
+			results_sd[,-1]=do.call(rbind, lapply(results_sd[,-1], formatC, format = "e", digits = digits))
+		}else{
+			results_sd[,-1]=do.call(rbind, lapply(results_sd[,-1], as.numeric))
+		}
 	}else{
-		results_sd[,-1]=lapply(results_sd[,-1], formatC, format = "e", digits = digits)
+		if(sn==TRUE){	
+			results_sd[,-1]=lapply(results_sd[,-1], formatC, format = "e", digits = digits)
+		}else{
+			results_sd[,-1]=lapply(results_sd[,-1], as.numeric)
+		}
 	}
   }
 
@@ -170,6 +187,8 @@ avri<-function(df, bkip=NULL, mode = "recipes", value = "day", st = NULL, et = N
   #output
   df_average=results
   df_sd=results_sd
-  results <- list(df_average = df_average, df_sd = df_sd)
+  names(df_average)[-1]=paste0(names(df)[-1],"_ave")
+  names(df_sd)[-1]=paste0(names(df)[-1],"_sd")
+  results=left_join(df_average, df_sd, by = names(df_average)[1])
   return(results)
 }
diff --git a/R/dm8n_batch.R b/R/dm8n_batch.R
@@ -0,0 +1,58 @@
+#' Calculate daily maximum-8-hour ozone in batch
+#'
+#' Calculates daily maximum-8-hour ozone in batch
+#'
+#' This function can calculate daily maximum-8-hour ozone in batch.
+#' @param df dataframe of time series for ozone and other related parameters.
+#' @param starthour numeric, start hour for calculating 8-hour ozone. By default, it equals to 0.
+#' @param endhour numeric, end hour for calculating 8-hour ozone. By default, it equals to 16 which means averaging ozone between 16~23.
+#' @param nh numeric. The number of effective hourly concentrations per 8-hour period.
+#' @param nc numeric. The number of effective 8-hour average concentrations per day.
+#' @param na.rm logical. Should missing values (including NaN) be omitted from the calculations?
+#' @param outputmode numeric, the format of the output, possible value: 1 or 2. See 'value' for the results of filling in 1 or 2.
+#' @return a dataframe depends on the value of
+#' 'outputMode'. Value 1 will output 1 list which incudes
+#' 1 table (maximum-8-hour ozone). Value 2 will output 
+#' 1 list which contains 4 tables (8-hour ozone, 
+#' statistics of the number of effective hourly 
+#' concentrations in each 8-hour average concentration, 
+#' statistics of the number of effective 8-hour average
+#'  concentrations in each day, maximum-8-hour ozone).
+#'
+#' @export
+#' @importFrom dplyr left_join
+
+dm8n_batch<-function(df, starthour = 0, endhour=16, nh=6, nc=14, na.rm = TRUE, outputmode = 1){	
+	xi_df=df[,c(1,2)]
+	xi=dm8n_np(xi_df, colid = 1, colio = 2, starthour = 0, endhour=16, nh=6, nc=14, na.rm = TRUE, outputmode = 2)
+	xi_D8_final=xi[["D8"]]
+	xi_D8_count_final=xi[["D8_count"]]
+	xi_D8_count_by_day_final=xi[["D8_count_by_day"]]
+	xi_DMAX8_final=xi[["DMAX8"]]
+
+	if(ncol(df)>2){
+		for(i in 3:ncol(df)){
+			xi_df=df[,c(1,i)]
+			xi=dm8n_np(xi_df, colid = 1, colio = 2, starthour = 0, endhour=16, nh=6, nc=14, na.rm = TRUE, outputmode = 2)
+			xi_D8=xi[["D8"]]
+			xi_D8_count=xi[["D8_count"]]
+			xi_D8_count_by_day=xi[["D8_count_by_day"]]
+			xi_DMAX8=xi[["DMAX8"]]
+			#left_join
+			xi_D8_final= left_join(xi_D8_final, xi_D8, by = names(xi_D8_final)[c(1,2,3)])
+			xi_D8_count_final= left_join(xi_D8_count_final, xi_D8_count, by = names(xi_D8_count_final)[c(1,2,3)])
+			xi_D8_count_by_day_final= left_join(xi_D8_count_by_day_final, xi_D8_count_by_day, by = names(xi_D8_count_by_day_final)[1])
+			xi_DMAX8_final= left_join(xi_DMAX8_final, xi_DMAX8, by = names(xi_DMAX8_final)[1])
+		}
+	}
+
+	#set out  put
+	if(outputmode==2){
+	  names(xi_D8_count_final)[c(-1,-2,-3)]=names(df)[-1]
+	  names(xi_D8_count_by_day_final)[-1]=names(df)[-1]
+	  results = list(D8=xi_D8_final, D8_count=xi_D8_count_final, D8_count_by_day=xi_D8_count_by_day_final, DMAX8=xi_DMAX8_final)
+	}else{
+	  results = xi_DMAX8_final
+	}
+
+}