From 8075d2e1f86737ef1569aeb4a8bd1d81aaf7be5c Mon Sep 17 00:00:00 2001 From: Tianshu Chen Date: Wed, 5 Aug 2020 04:14:26 -0600 Subject: [PATCH] v1.3.0 New upadate for ofp (improved output). --- DESCRIPTION | 2 +- R/ofp.R | 148 +++++++++++++++++++++++++++++++++++++++++++++------- man/ofp.Rd | 11 ++-- 3 files changed, 138 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3015903..07715b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: foqat Type: Package Title: Field observation quick analysis tookit -Version: 1.2.5 +Version: 1.3.0 Author: Tianshu Chen Maintainer: Tianshu Chen Description: A quick analysis toolkits for atmospheric field observation. diff --git a/R/ofp.R b/R/ofp.R index 291bcfb..fbedbfb 100644 --- a/R/ofp.R +++ b/R/ofp.R @@ -10,26 +10,35 @@ #' #' @param df dataframe contains time series. #' @param colid column index for date-time. The default value is 1. -#' @param unit unit for VOC concentration. A character vector from these options: "ugm" or "ppb". "ugm" means ug/m3. "ppb" means part per billion volumn. +#' @param unit unit for VOC concentration. A character vector from these options: "ugm" or "ppbv". "ugm" means ug/m3. "ppbv" means part per billion volumn. #' @param t Temperature, in units k, for conversion from PPB to micrograms per #' cubic meter. By default, t equals to 25 degrees Celsius. #' @param p Pressure, in kPa, for converting from PPB to micrograms per cubic #' meter. By default, p equals to 101.325 kPa. +#' @param sortd logical value. It determines whether the VOC species +#' are sorted or not. By default, sortd has value "TRUE". +#' If TRUE, VOC species in time series will be arranged according to VOC group, +#' relative molecular weight, and MIR value. +#' @param colid column index for date-time. The default value is 1. #' @return a list contains 2 tables: results for matched MIR values and OFP time series. #' @export #' @examples -#' ofp(voc, unit = "ppb") +#' ofp(voc) #' @importFrom utils URLencode #' @importFrom xml2 read_html -ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){ - +ofp <- function(df, unit = "ppbv", t = 25, p = 101.325, sortd =TRUE, colid = 1){ #set colid if(colid != 1){ df[,c(1,colid)] = df[,c(colid,1)] colnames(df)[c(1,colid)] = colnames(df)[c(colid,1)] } - + + #In case df is not a dataframe. + temp_col_name <- colnames(df) + df <- data.frame(df,stringsAsFactors = FALSE) + colnames(df) <- temp_col_name + #get VOC name by colnames of df #if read from xlsx, replace "X" and "." colnm_df = colnames(df)[2:ncol(df)] @@ -37,8 +46,9 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){ chemicalnames = gsub("\\.", "-", chemicalnames) #if i- chemicalnames = gsub("\\i-", "iso-", chemicalnames) + #build name_df - name_df = data.frame(name = chemicalnames,CAS = NA, Source = NA, Matched_Name = NA, MIR = NA, MW = NA, stringsAsFactors = FALSE) + name_df = data.frame(name = chemicalnames,CAS = NA, Source = NA, Matched_Name = NA, MIR = NA, MW = NA, Group = NA, stringsAsFactors = FALSE) #search VOC name to get CAS Number from different sources, add cas, sources, mathed_name to name_df ##firstly by NIST @@ -73,6 +83,7 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){ name_df$MIR[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$New[as.numeric(a)] name_df$Matched_Name[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$Description[as.numeric(a)] name_df$MW[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$MWt[as.numeric(a)] + name_df$Group[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$Group[as.numeric(a)] #if it is matched by CAS in NIST and matched by name in Carter paper, but it doesn't have CAS in Carter paper. @@ -86,6 +97,7 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){ name_df$MIR[as.numeric(k)] = df_null$New[1] name_df$Source[as.numeric(k)] = "CAS is found in NIST. But it only has name in Carter paper 2010" name_df$MW[as.numeric(k)] = df_null$MWt[1] + name_df$Group[as.numeric(k)] = df_null$Group[1] } } @@ -100,23 +112,121 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){ name_df$MIR[as.numeric(k)] = df_null$New[1] name_df$Source[as.numeric(k)] = "Carter paper 2010" name_df$MW[as.numeric(k)] = df_null$MWt[1] + name_df$Group[as.numeric(k)] = df_null$Group[1] } } - - #multiple df with MIR in name_df - ofp_df=df + + #set GROUP to Unknown for NA group + name_df$Group[is.na(name_df$Group)] = "Unknown" + + #set GROUP to BVOC for BVOC group + name_df$Group[name_df$CAS %in% c('80-56-8','127-91-3','78-79-5')] = "BVOC" + + #raw_order + name_df$raw_order = seq.int(nrow(name_df)) + + #set order for voc species in df and name_df + if(sortd==TRUE){ + #order by 2 columns + name_df$Group <- factor(name_df$Group, levels = c("Alkanes", "Alkenes", "BVOC", "Alkynes", "Aromatic_Hydrocarbons", "Oxygenated_Organics", "Other_Organic_Compounds", "Unknown")) + name_df = name_df[with(name_df, order(Group, MW, MIR)), ] + df[,2:ncol(df)]=df[,name_df$raw_order+1] + colnames(df)[2:ncol(df)]=colnames(df)[name_df$raw_order+1] + } + + #set concentration df, multiple df with MIR in name_df + ofp_df = df + r = 22.4*(273.15+t)*101.325/(273.15*p) if(unit=="ugm"){ - ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MIR)[x-1])) - #results - results <- list(MIR_Result = name_df, OFP_Result = ofp_df) - return(results) - }else if(unit=="ppb"){ - r = 22.4*(273.15+t)*101.325/(273.15*p) - ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MW*name_df$MIR/r)[x-1])) - #results - results <- list(MIR_Result = name_df, OFP_Result = ofp_df) - return(results) + Con_ugm = df + Con_ppbv = Con_ugm + Con_ppbv[,2:ncol(Con_ugm)] = data.frame(sapply(2:ncol(Con_ugm),function(x) Con_ugm[,x]*as.numeric(r/name_df$MW)[x-1])) + ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MIR)[x-1])) + }else if(unit=="ppbv"){ + Con_ppbv = df + Con_ugm = Con_ppbv + Con_ugm[,2:ncol(Con_ppbv)] = data.frame(sapply(2:ncol(Con_ppbv),function(x) Con_ppbv[,x]*as.numeric(name_df$MW/r)[x-1])) + ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MIR*name_df$MW/r)[x-1])) }else{ print("unit error") } + + #vector of group names + gn_list = c("Alkanes", "Alkenes", "BVOC", "Alkynes", "Aromatic_Hydrocarbons", "Oxygenated_Organics", "Other_Organic_Compounds", "Unknown") + + #generate group df + Con_ppbv_group=data.frame(Time=df[,1], Alkanes=NA, Alkenes_exclude_BVOC=NA, BVOC=NA, Alkynes=NA, Aromatic_Hydrocarbons=NA, Oxygenated_Organics=NA, Other_Organic_Compounds=NA, Unknown=NA) + Con_ugm_group=data.frame(Time=df[,1], Alkanes=NA, Alkenes_exclude_BVOC=NA, BVOC=NA, Alkynes=NA, Aromatic_Hydrocarbons=NA, Oxygenated_Organics=NA, Other_Organic_Compounds=NA, Unknown=NA) + ofp_df_group=data.frame(Time=df[,1], Alkanes=NA, Alkenes_exclude_BVOC=NA, BVOC=NA, Alkynes=NA, Aromatic_Hydrocarbons=NA, Oxygenated_Organics=NA, Other_Organic_Compounds=NA, Unknown=NA) + + #sum up columns + for(gn in 1:length(gn_list)){ + gn_sub_index = which(name_df$Group == gn_list[gn]) + if(length(gn_sub_index)!=0){ + if(length(gn_sub_index)==1){ + Con_ppbv_group[,gn+1]=Con_ppbv[,gn_sub_index+1] + Con_ugm_group[,gn+1]=Con_ugm[,gn_sub_index+1] + ofp_df_group[,gn+1]=ofp_df[,gn_sub_index+1] + }else{ + Con_ppbv_group[,gn+1]=rowSums(Con_ppbv[,gn_sub_index+1],na.rm = TRUE) + Con_ugm_group[,gn+1]=rowSums(Con_ugm[,gn_sub_index+1],na.rm = TRUE) + ofp_df_group[,gn+1]=rowSums(ofp_df[,gn_sub_index+1],na.rm = TRUE) + } + } + } + + #Con_ugm_mean + Con_ugm_mean=data.frame(species=row.names(statdf(Con_ugm)[-1,]),mean=as.numeric(as.character(statdf(Con_ugm,n = 6)[-1,1]))) + Con_ugm_mean$Proportion=Con_ugm_mean$mean/sum(as.numeric(as.character(statdf(Con_ugm,n = 6)[-1,1])),na.rm = TRUE) + Con_ugm_mean$Proportion=round(Con_ugm_mean$Proportion,4) + Con_ugm_mean=Con_ugm_mean[with(Con_ugm_mean, order(-mean)), ] + + #Con_ppbv_mean + Con_ppbv_mean=data.frame(species=row.names(statdf(Con_ppbv)[-1,]),mean=as.numeric(as.character(statdf(Con_ppbv,n = 6)[-1,1]))) + Con_ppbv_mean$Proportion=Con_ppbv_mean$mean/sum(as.numeric(as.character(statdf(Con_ppbv,n = 6)[-1,1])),na.rm = TRUE) + Con_ppbv_mean$Proportion=round(Con_ppbv_mean$Proportion,4) + Con_ppbv_mean=Con_ppbv_mean[with(Con_ppbv_mean, order(-mean)), ] + + #ofp_df_mean + ofp_df_mean=data.frame(species=row.names(statdf(ofp_df)[-1,]),mean=as.numeric(as.character(statdf(ofp_df,n = 6)[-1,1]))) + ofp_df_mean$Proportion=ofp_df_mean$mean/sum(as.numeric(as.character(statdf(ofp_df,n = 6)[-1,1])),na.rm = TRUE) + ofp_df_mean$Proportion=round(ofp_df_mean$Proportion,4) + ofp_df_mean=ofp_df_mean[with(ofp_df_mean, order(-mean)), ] + + #Con_ugm_group_mean + Con_ugm_group_mean=data.frame(species=row.names(statdf(Con_ugm_group)[-1,]),mean=as.numeric(as.character(statdf(Con_ugm_group,n = 6)[-1,1]))) + Con_ugm_group_mean$Proportion=Con_ugm_group_mean$mean/sum(as.numeric(as.character(statdf(Con_ugm_group,n = 6)[-1,1])),na.rm = TRUE) + Con_ugm_group_mean$Proportion=round(Con_ugm_group_mean$Proportion,4) + Con_ugm_group_mean=Con_ugm_group_mean[with(Con_ugm_group_mean, order(-mean)), ] + + #Con_ppbv_group_mean + Con_ppbv_group_mean=data.frame(species=row.names(statdf(Con_ppbv_group)[-1,]),mean=as.numeric(as.character(statdf(Con_ppbv_group,n = 6)[-1,1]))) + Con_ppbv_group_mean$Proportion=Con_ppbv_group_mean$mean/sum(as.numeric(as.character(statdf(Con_ppbv_group,n = 6)[-1,1])),na.rm = TRUE) + Con_ppbv_group_mean$Proportion=round(Con_ppbv_group_mean$Proportion,4) + Con_ppbv_group_mean=Con_ppbv_group_mean[with(Con_ppbv_group_mean, order(-mean)), ] + + #ofp_df_group_mean + ofp_df_group_mean=data.frame(species=row.names(statdf(ofp_df_group)[-1,]),mean=as.numeric(as.character(statdf(ofp_df_group,n = 6)[-1,1]))) + ofp_df_group_mean$Proportion=ofp_df_group_mean$mean/sum(as.numeric(as.character(statdf(ofp_df_group,n = 6)[-1,1])),na.rm = TRUE) + ofp_df_group_mean$Proportion=round(ofp_df_group_mean$Proportion,4) + ofp_df_group_mean=ofp_df_group_mean[with(ofp_df_group_mean, order(-mean)), ] + + + #results + results <- list( + Con_ugm = Con_ugm, + Con_ugm_mean = Con_ugm_mean, + Con_ugm_group = Con_ugm_group, + Con_ugm_group_mean = Con_ugm_group_mean, + Con_ppbv = Con_ppbv, + Con_ppbv_mean = Con_ppbv_mean, + Con_ppbv_group = Con_ppbv_group, + Con_ppbv_group_mean = Con_ppbv_group_mean, + MIR_Result = name_df, + OFP_Result = ofp_df, + OFP_Result_mean = ofp_df_mean, + OFP_Result_group = ofp_df_group, + OFP_Result_group_mean = ofp_df_group_mean + ) + return(results) } diff --git a/man/ofp.Rd b/man/ofp.Rd index 4717913..d329efd 100644 --- a/man/ofp.Rd +++ b/man/ofp.Rd @@ -4,12 +4,12 @@ \alias{ofp} \title{Calculate ozone formation potential} \usage{ -ofp(df, unit = "ugm", t = 25, p = 101.325, colid = 1) +ofp(df, unit = "ppbv", t = 25, p = 101.325, sortd = TRUE, colid = 1) } \arguments{ \item{df}{dataframe contains time series.} -\item{unit}{unit for VOC concentration. A character vector from these options: "ugm" or "ppb". "ugm" means ug/m3. "ppb" means part per billion volumn.} +\item{unit}{unit for VOC concentration. A character vector from these options: "ugm" or "ppbv". "ugm" means ug/m3. "ppbv" means part per billion volumn.} \item{t}{Temperature, in units k, for conversion from PPB to micrograms per cubic meter. By default, t equals to 25 degrees Celsius.} @@ -17,6 +17,11 @@ cubic meter. By default, t equals to 25 degrees Celsius.} \item{p}{Pressure, in kPa, for converting from PPB to micrograms per cubic meter. By default, p equals to 101.325 kPa.} +\item{sortd}{logical value. It determines whether the VOC species +are sorted or not. By default, sortd has value "TRUE". +If TRUE, VOC species in time series will be arranged according to VOC group, + relative molecular weight, and MIR value.} + \item{colid}{column index for date-time. The default value is 1.} } \value{ @@ -33,5 +38,5 @@ reactivity scale and hydrocarbon bin reactivities for regulatory applications. California Air Resources Board Contract, 2009, 339" (revised January 28, 2010). } \examples{ -ofp(voc, unit = "ppb") +ofp(voc) }