Skip to content

Commit

Permalink
Merge pull request #53 from tianshu129/test
Browse files Browse the repository at this point in the history
v1.3.0
  • Loading branch information
tianshu129 authored Aug 5, 2020
2 parents f5f4d55 + 8075d2e commit dc94169
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 23 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: foqat
Type: Package
Title: Field observation quick analysis tookit
Version: 1.2.5
Version: 1.3.0
Author: Tianshu Chen
Maintainer: Tianshu Chen <[email protected]>
Description: A quick analysis toolkits for atmospheric field observation.
Expand Down
148 changes: 129 additions & 19 deletions R/ofp.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,45 @@
#'
#' @param df dataframe contains time series.
#' @param colid column index for date-time. The default value is 1.
#' @param unit unit for VOC concentration. A character vector from these options: "ugm" or "ppb". "ugm" means ug/m3. "ppb" means part per billion volumn.
#' @param unit unit for VOC concentration. A character vector from these options: "ugm" or "ppbv". "ugm" means ug/m3. "ppbv" means part per billion volumn.
#' @param t Temperature, in units k, for conversion from PPB to micrograms per
#' cubic meter. By default, t equals to 25 degrees Celsius.
#' @param p Pressure, in kPa, for converting from PPB to micrograms per cubic
#' meter. By default, p equals to 101.325 kPa.
#' @param sortd logical value. It determines whether the VOC species
#' are sorted or not. By default, sortd has value "TRUE".
#' If TRUE, VOC species in time series will be arranged according to VOC group,
#' relative molecular weight, and MIR value.
#' @param colid column index for date-time. The default value is 1.
#' @return a list contains 2 tables: results for matched MIR values and OFP time series.
#' @export
#' @examples
#' ofp(voc, unit = "ppb")
#' ofp(voc)
#' @importFrom utils URLencode
#' @importFrom xml2 read_html

ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){

ofp <- function(df, unit = "ppbv", t = 25, p = 101.325, sortd =TRUE, colid = 1){
#set colid
if(colid != 1){
df[,c(1,colid)] = df[,c(colid,1)]
colnames(df)[c(1,colid)] = colnames(df)[c(colid,1)]
}


#In case df is not a dataframe.
temp_col_name <- colnames(df)
df <- data.frame(df,stringsAsFactors = FALSE)
colnames(df) <- temp_col_name

#get VOC name by colnames of df
#if read from xlsx, replace "X" and "."
colnm_df = colnames(df)[2:ncol(df)]
chemicalnames = ifelse(substr(colnm_df, 1, 1) == "X", sub("^.", "", colnm_df), colnm_df)
chemicalnames = gsub("\\.", "-", chemicalnames)
#if i-
chemicalnames = gsub("\\i-", "iso-", chemicalnames)

#build name_df
name_df = data.frame(name = chemicalnames,CAS = NA, Source = NA, Matched_Name = NA, MIR = NA, MW = NA, stringsAsFactors = FALSE)
name_df = data.frame(name = chemicalnames,CAS = NA, Source = NA, Matched_Name = NA, MIR = NA, MW = NA, Group = NA, stringsAsFactors = FALSE)

#search VOC name to get CAS Number from different sources, add cas, sources, mathed_name to name_df
##firstly by NIST
Expand Down Expand Up @@ -73,6 +83,7 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){
name_df$MIR[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$New[as.numeric(a)]
name_df$Matched_Name[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$Description[as.numeric(a)]
name_df$MW[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$MWt[as.numeric(a)]
name_df$Group[which(name_df$Source=="NIST"&!is.na(name_df$CAS))] = datacas$Group[as.numeric(a)]


#if it is matched by CAS in NIST and matched by name in Carter paper, but it doesn't have CAS in Carter paper.
Expand All @@ -86,6 +97,7 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){
name_df$MIR[as.numeric(k)] = df_null$New[1]
name_df$Source[as.numeric(k)] = "CAS is found in NIST. But it only has name in Carter paper 2010"
name_df$MW[as.numeric(k)] = df_null$MWt[1]
name_df$Group[as.numeric(k)] = df_null$Group[1]
}
}

Expand All @@ -100,23 +112,121 @@ ofp <- function(df, unit = "ugm", t = 25, p = 101.325, colid = 1){
name_df$MIR[as.numeric(k)] = df_null$New[1]
name_df$Source[as.numeric(k)] = "Carter paper 2010"
name_df$MW[as.numeric(k)] = df_null$MWt[1]
name_df$Group[as.numeric(k)] = df_null$Group[1]
}
}

#multiple df with MIR in name_df
ofp_df=df

#set GROUP to Unknown for NA group
name_df$Group[is.na(name_df$Group)] = "Unknown"

#set GROUP to BVOC for BVOC group
name_df$Group[name_df$CAS %in% c('80-56-8','127-91-3','78-79-5')] = "BVOC"

#raw_order
name_df$raw_order = seq.int(nrow(name_df))

#set order for voc species in df and name_df
if(sortd==TRUE){
#order by 2 columns
name_df$Group <- factor(name_df$Group, levels = c("Alkanes", "Alkenes", "BVOC", "Alkynes", "Aromatic_Hydrocarbons", "Oxygenated_Organics", "Other_Organic_Compounds", "Unknown"))
name_df = name_df[with(name_df, order(Group, MW, MIR)), ]
df[,2:ncol(df)]=df[,name_df$raw_order+1]
colnames(df)[2:ncol(df)]=colnames(df)[name_df$raw_order+1]
}

#set concentration df, multiple df with MIR in name_df
ofp_df = df
r = 22.4*(273.15+t)*101.325/(273.15*p)
if(unit=="ugm"){
ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MIR)[x-1]))
#results
results <- list(MIR_Result = name_df, OFP_Result = ofp_df)
return(results)
}else if(unit=="ppb"){
r = 22.4*(273.15+t)*101.325/(273.15*p)
ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MW*name_df$MIR/r)[x-1]))
#results
results <- list(MIR_Result = name_df, OFP_Result = ofp_df)
return(results)
Con_ugm = df
Con_ppbv = Con_ugm
Con_ppbv[,2:ncol(Con_ugm)] = data.frame(sapply(2:ncol(Con_ugm),function(x) Con_ugm[,x]*as.numeric(r/name_df$MW)[x-1]))
ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MIR)[x-1]))
}else if(unit=="ppbv"){
Con_ppbv = df
Con_ugm = Con_ppbv
Con_ugm[,2:ncol(Con_ppbv)] = data.frame(sapply(2:ncol(Con_ppbv),function(x) Con_ppbv[,x]*as.numeric(name_df$MW/r)[x-1]))
ofp_df[,2:ncol(ofp_df)] = data.frame(sapply(2:ncol(df),function(x) df[,x] * as.numeric(name_df$MIR*name_df$MW/r)[x-1]))
}else{
print("unit error")
}

#vector of group names
gn_list = c("Alkanes", "Alkenes", "BVOC", "Alkynes", "Aromatic_Hydrocarbons", "Oxygenated_Organics", "Other_Organic_Compounds", "Unknown")

#generate group df
Con_ppbv_group=data.frame(Time=df[,1], Alkanes=NA, Alkenes_exclude_BVOC=NA, BVOC=NA, Alkynes=NA, Aromatic_Hydrocarbons=NA, Oxygenated_Organics=NA, Other_Organic_Compounds=NA, Unknown=NA)
Con_ugm_group=data.frame(Time=df[,1], Alkanes=NA, Alkenes_exclude_BVOC=NA, BVOC=NA, Alkynes=NA, Aromatic_Hydrocarbons=NA, Oxygenated_Organics=NA, Other_Organic_Compounds=NA, Unknown=NA)
ofp_df_group=data.frame(Time=df[,1], Alkanes=NA, Alkenes_exclude_BVOC=NA, BVOC=NA, Alkynes=NA, Aromatic_Hydrocarbons=NA, Oxygenated_Organics=NA, Other_Organic_Compounds=NA, Unknown=NA)

#sum up columns
for(gn in 1:length(gn_list)){
gn_sub_index = which(name_df$Group == gn_list[gn])
if(length(gn_sub_index)!=0){
if(length(gn_sub_index)==1){
Con_ppbv_group[,gn+1]=Con_ppbv[,gn_sub_index+1]
Con_ugm_group[,gn+1]=Con_ugm[,gn_sub_index+1]
ofp_df_group[,gn+1]=ofp_df[,gn_sub_index+1]
}else{
Con_ppbv_group[,gn+1]=rowSums(Con_ppbv[,gn_sub_index+1],na.rm = TRUE)
Con_ugm_group[,gn+1]=rowSums(Con_ugm[,gn_sub_index+1],na.rm = TRUE)
ofp_df_group[,gn+1]=rowSums(ofp_df[,gn_sub_index+1],na.rm = TRUE)
}
}
}

#Con_ugm_mean
Con_ugm_mean=data.frame(species=row.names(statdf(Con_ugm)[-1,]),mean=as.numeric(as.character(statdf(Con_ugm,n = 6)[-1,1])))
Con_ugm_mean$Proportion=Con_ugm_mean$mean/sum(as.numeric(as.character(statdf(Con_ugm,n = 6)[-1,1])),na.rm = TRUE)
Con_ugm_mean$Proportion=round(Con_ugm_mean$Proportion,4)
Con_ugm_mean=Con_ugm_mean[with(Con_ugm_mean, order(-mean)), ]

#Con_ppbv_mean
Con_ppbv_mean=data.frame(species=row.names(statdf(Con_ppbv)[-1,]),mean=as.numeric(as.character(statdf(Con_ppbv,n = 6)[-1,1])))
Con_ppbv_mean$Proportion=Con_ppbv_mean$mean/sum(as.numeric(as.character(statdf(Con_ppbv,n = 6)[-1,1])),na.rm = TRUE)
Con_ppbv_mean$Proportion=round(Con_ppbv_mean$Proportion,4)
Con_ppbv_mean=Con_ppbv_mean[with(Con_ppbv_mean, order(-mean)), ]

#ofp_df_mean
ofp_df_mean=data.frame(species=row.names(statdf(ofp_df)[-1,]),mean=as.numeric(as.character(statdf(ofp_df,n = 6)[-1,1])))
ofp_df_mean$Proportion=ofp_df_mean$mean/sum(as.numeric(as.character(statdf(ofp_df,n = 6)[-1,1])),na.rm = TRUE)
ofp_df_mean$Proportion=round(ofp_df_mean$Proportion,4)
ofp_df_mean=ofp_df_mean[with(ofp_df_mean, order(-mean)), ]

#Con_ugm_group_mean
Con_ugm_group_mean=data.frame(species=row.names(statdf(Con_ugm_group)[-1,]),mean=as.numeric(as.character(statdf(Con_ugm_group,n = 6)[-1,1])))
Con_ugm_group_mean$Proportion=Con_ugm_group_mean$mean/sum(as.numeric(as.character(statdf(Con_ugm_group,n = 6)[-1,1])),na.rm = TRUE)
Con_ugm_group_mean$Proportion=round(Con_ugm_group_mean$Proportion,4)
Con_ugm_group_mean=Con_ugm_group_mean[with(Con_ugm_group_mean, order(-mean)), ]

#Con_ppbv_group_mean
Con_ppbv_group_mean=data.frame(species=row.names(statdf(Con_ppbv_group)[-1,]),mean=as.numeric(as.character(statdf(Con_ppbv_group,n = 6)[-1,1])))
Con_ppbv_group_mean$Proportion=Con_ppbv_group_mean$mean/sum(as.numeric(as.character(statdf(Con_ppbv_group,n = 6)[-1,1])),na.rm = TRUE)
Con_ppbv_group_mean$Proportion=round(Con_ppbv_group_mean$Proportion,4)
Con_ppbv_group_mean=Con_ppbv_group_mean[with(Con_ppbv_group_mean, order(-mean)), ]

#ofp_df_group_mean
ofp_df_group_mean=data.frame(species=row.names(statdf(ofp_df_group)[-1,]),mean=as.numeric(as.character(statdf(ofp_df_group,n = 6)[-1,1])))
ofp_df_group_mean$Proportion=ofp_df_group_mean$mean/sum(as.numeric(as.character(statdf(ofp_df_group,n = 6)[-1,1])),na.rm = TRUE)
ofp_df_group_mean$Proportion=round(ofp_df_group_mean$Proportion,4)
ofp_df_group_mean=ofp_df_group_mean[with(ofp_df_group_mean, order(-mean)), ]


#results
results <- list(
Con_ugm = Con_ugm,
Con_ugm_mean = Con_ugm_mean,
Con_ugm_group = Con_ugm_group,
Con_ugm_group_mean = Con_ugm_group_mean,
Con_ppbv = Con_ppbv,
Con_ppbv_mean = Con_ppbv_mean,
Con_ppbv_group = Con_ppbv_group,
Con_ppbv_group_mean = Con_ppbv_group_mean,
MIR_Result = name_df,
OFP_Result = ofp_df,
OFP_Result_mean = ofp_df_mean,
OFP_Result_group = ofp_df_group,
OFP_Result_group_mean = ofp_df_group_mean
)
return(results)
}
11 changes: 8 additions & 3 deletions man/ofp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit dc94169

Please sign in to comment.