From 827822ef148bd43adb130842b067e956a6e6292b Mon Sep 17 00:00:00 2001 From: mwsohn Date: Sat, 8 Oct 2022 11:35:13 -0400 Subject: [PATCH 1/2] dfmerge updated --- .vscode/settings.json | 1 + src/DataFrame_tools.jl | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/DataFrame_tools.jl b/src/DataFrame_tools.jl index 6843de4..b772165 100644 --- a/src/DataFrame_tools.jl +++ b/src/DataFrame_tools.jl @@ -566,15 +566,15 @@ function dfmerge(df1::DataFrame,df2::DataFrame,linkers::Union{Symbol,Vector};kin error("`:_merge' exists in the second dataframe") end - df1[:___mergeleft___] = ones(Int8,size(df1,1)) - df2[:___mergeright___] = ones(Int8,size(df2,1)) + df1[!,:___mergeleft___] = ones(Int8,size(df1,1)) + df2[!,:___mergeright___] = ones(Int8,size(df2,1)) df_merged = join(df1,df2,on = linkers,kind=kind) - df_merged[:_merge] = zeros(Int8,size(df_merged,1)) + df_merged[!,:_merge] = zeros(Int8,size(df_merged,1)) for i = 1:size(df_merged,1) if isna(df_merged[i,:___mergeright___]) df_merged[i,:_merge] = 1 - elseif isna(df_merged[i,:___mergeleft___]) + elseif ismissing(df_merged[i,:___mergeleft___]) df_merged[i,:_merge] = 2 elseif df_merged[i,:___mergeleft___] == 1 && df_merged[i,:___mergeright___] == 1 df_merged[i,:_merge] = 3 From a53c16bc9fad8ffe323f172e7856171990cd2882 Mon Sep 17 00:00:00 2001 From: mwsohn Date: Sat, 19 Nov 2022 13:02:36 -0500 Subject: [PATCH 2/2] Update Stata_Reader.jl --- src/Stata_Reader.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Stata_Reader.jl b/src/Stata_Reader.jl index 42142c2..f783a76 100644 --- a/src/Stata_Reader.jl +++ b/src/Stata_Reader.jl @@ -277,7 +277,7 @@ function read_stata!(fn,df::DataFrame,label::Dict; categorize=true, verbose=fals println("Processing variable ",j," ", varlist[j]) end - df[varlist[j]] = alloc_array(typelist[j],fmtlist[j],nobs) + df[!,varlist[j]] = alloc_array(typelist[j],fmtlist[j],nobs) for i in 1:nobs @@ -368,6 +368,11 @@ function read_stata!(fn,df::DataFrame,label::Dict; categorize=true, verbose=fals categorical!(df,varlist[j]) gc() end + + # if there are no missing values, convert the variable to an appropriate vector + if sum(ismissing.(df[:,j])) == 0 + df[!,varlist[j]] = convert(Vector{nonmissingtype(eltype(df[!,varlist[j]]))},df[!,varlist[j]]) + end end # read value labels