From f621804c85dbf838ddff8213d80a1f3beef95226 Mon Sep 17 00:00:00 2001
From: tlorusso
Date: Thu, 19 Dec 2024 13:55:45 +0000
Subject: [PATCH] update Readme
---
README.Rmd | 3 +++
README.md | 4 ++++
docs/index.html | 3 +++
docs/pkgdown.yml | 2 +-
docs/search.json | 2 +-
5 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/README.Rmd b/README.Rmd
index d5b207f..7e40aae 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,6 +13,9 @@ output: github_document
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+
+
+
The Plausi package is designed for R-supported election forensics. It provides functions that enable the identification of statistical irregularities and anomalies in vote results.
Key features include:
diff --git a/README.md b/README.md
index 4205ece..100a5e8 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,10 @@
MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+
+
+
+
The Plausi package is designed for R-supported election forensics. It
provides functions that enable the identification of statistical
irregularities and anomalies in vote results.
diff --git a/docs/index.html b/docs/index.html
index 21827ed..fbb1836 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -58,6 +58,9 @@
Predict votes and detect anomalies using R.
+
+
+
The Plausi package is designed for R-supported election forensics. It provides functions that enable the identification of statistical irregularities and anomalies in vote results.
Key features include:
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index d6500e7..200055f 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -4,4 +4,4 @@ pkgdown_sha: ~
articles:
turnout: turnout.html
yes-shares: yes-shares.html
-last_built: 2024-12-18T21:15Z
+last_built: 2024-12-19T13:54Z
diff --git a/docs/search.json b/docs/search.json
index ef0e93c..8f20dbf 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -1 +1 @@
-[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 plausi authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":[]},{"path":"/articles/turnout.html","id":"option-1-retrieve-voting-results-dynamically-via-swissdd-from-opendata-swiss","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 1: Retrieve voting results dynamically via swissdd from opendata.swiss","title":"Turnout Outliers","text":"can access voting information federal level using swissdd package.","code":"# Install and attach package devtools::install_github(\"politanch/swissdd\") library(swissdd) # Retrieve results from the canton of Zurich for all federal votes from 2020-09-27 results <- swissdd::get_nationalvotes(votedates = c(\"2020-09-27\")) |> filter(canton_id == 1)"},{"path":"/articles/turnout.html","id":"option-2-use-the-preloaded-result_data-included-in-the-package","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 2: Use the preloaded result_data included in the package","title":"Turnout Outliers","text":"","code":"# Get historical package data results <- result_data |> filter(votedate == \"2020-09-27\")"},{"path":"/articles/turnout.html","id":"calculate-voter-turnout-differences","dir":"Articles","previous_headings":"","what":"Calculate Voter Turnout Differences","title":"Turnout Outliers","text":"","code":"# Get all possible combinations of proposals combinations <- as.data.frame(t(combn(unique(results$id), 2))) # Reduce the dataset to turnout, vote id and geographic attributes data_wide <- results |> select(id, canton_name, mun_name, mun_id, stimmbeteiligungInProzent) |> # Transpose to wide format pivot_wider(names_from = id, values_from = stimmbeteiligungInProzent) |> mutate_if(is.character, as.factor) # Calculate turnout differences diff1 <- get_differences( data_wide, combinations$V1, combinations$V2, geo_cols = c(\"canton_name\", \"mun_name\", \"mun_id\") )"},{"path":"/articles/turnout.html","id":"identify-suspicious-voter-turnout-differences","dir":"Articles","previous_headings":"","what":"Identify Suspicious Voter Turnout Differences","title":"Turnout Outliers","text":"Now calculated voter turnout differences various proposals municipality, next step identify municipalities show statistically suspicious differences proposal combination. , use is_outlier_double_mad function plausi package. plot helps visualize distribution differences identify particularly values. combinations, differences 3 percentage points certain cases.","code":"diff2 <- diff1 |> group_by(combination) |> # Threshold for outliers: 5 median deviations from median instead of 3.5 mutate(outlier = is_outlier_double_mad(difference, threshold = 5)) |> mutate(median_difference = median(difference)) ggplot(diff2, aes(combination, difference)) + geom_violin() + geom_jitter(alpha = 0.5, aes(color = outlier)) + theme_minimal() + # theme(axis.text.x = element_text(angle = 45), vjust = 0.5, hjust = 1) + theme(axis.text.x = element_text( angle = 45, hjust = 1, vjust = 1 )) + scale_colour_manual(values = c( \"TRUE\" = \"#B01657\", \"FALSE\" = \"#00797B\" )) diff2 |> arrange(desc(abs(difference))) |> head() #> # A tibble: 6 × 7 #> # Groups: combination [6] #> canton_name mun_name mun_id combination difference outlier median_difference #> #> 1 Zürich Buch am I… 24 6310_6320 2.82 TRUE 0.295 #> 2 Zürich Buch am I… 24 6320_6340 -2.82 TRUE -0.0826 #> 3 Zürich Regensberg 95 6340_6350 -2.54 TRUE -0.0836 #> 4 Zürich Buch am I… 24 6320_6330 -2.54 TRUE 0.149 #> 5 Zürich Buch am I… 24 6320_6350 -2.54 TRUE -0.174 #> 6 Zürich Regensberg 95 6310_6340 2.22 TRUE 0.196"},{"path":"/articles/turnout.html","id":"finding-municipalities-with-the-most-notable-turnout-differences","dir":"Articles","previous_headings":"","what":"Finding Municipalities with the Most Notable Turnout Differences","title":"Turnout Outliers","text":"pinpoint municipalities suspicious turnout differences may require investigation, count number flagged outlier combinations municipality. ranking municipalities based number anomalous combinations, can efficiently prioritise review efforts. Municipalities highest count flagged cases considered critical scrutiny. context electoral analysis, ballot Jagdgesetz associated vorlage_id 6320 raises questions case municipality Buch Irchel. illustrate , can examine absolute turnout numbers provided column eingelegteStimmzettel. four five voting topics, number incoming ballots either 484 486. However, one specific topic, count noticeably lower, margin 20 ballots. discrepancy necessarily imply error, situation warrants attention.","code":"anomalous_topics <- diff2 |> separate(combination, into = c(\"vorlage1\", \"vorlage2\"), sep = \"_\") |> filter(outlier == TRUE) |> pivot_longer( cols = -c(canton_name, mun_name, mun_id, difference, outlier, median_difference), names_to = \"vorlage\", values_to = \"vorlage_id\" ) |> group_by(mun_name, mun_id, vorlage_id) |> summarize(n = n()) |> arrange(desc(n)) anomalous_topics #> # A tibble: 82 × 4 #> # Groups: mun_name, mun_id [23] #> mun_name mun_id vorlage_id n #> #> 1 Buch am Irchel 24 6320 4 #> 2 Hüttikon 87 6320 4 #> 3 Regensberg 95 6340 4 #> 4 Bachs 81 6330 3 #> 5 Bäretswil 111 6330 3 #> 6 Bülach 53 6350 3 #> 7 Dänikon 85 6310 3 #> 8 Dorf 26 6310 3 #> 9 Maschwanden 8 6330 3 #> 10 Maschwanden 8 6340 3 #> # ℹ 72 more rows"},{"path":"/articles/turnout.html","id":"why-this-difference-matters","dir":"Articles","previous_headings":"","what":"Why This Difference Matters","title":"Turnout Outliers","text":"deviations noteworthy typically prompt follow-discussions municipalities involved. Possible explanations include: Misplaced Ballots: may mishap leading misplacement 20 ballots. Aggregation Error: may mishap leading erroneous aggregation different bundles ballots. Data Entry Error: discrepancy also result typographical mistake data entry process election result system. immediate evidence wrongdoing systemic issue, kinds irregularities part standard review process ensure accuracy integrity electoral data.","code":"results |> filter(mun_name == \"Buch am Irchel\") |> select( mun_name, mun_id, # name, id, eingelegteStimmzettel ) #> mun_name mun_id id eingelegteStimmzettel #> 1 Buch am Irchel 24 6310 486 #> 2 Buch am Irchel 24 6320 466 #> 3 Buch am Irchel 24 6330 484 #> 4 Buch am Irchel 24 6340 486 #> 5 Buch am Irchel 24 6350 484"},{"path":[]},{"path":"/articles/yes-shares.html","id":"option-1-retrieve-voting-results-dynamically-via-swissdd-from-opendata-swiss","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 1: Retrieve voting results dynamically via swissdd from opendata.swiss","title":"Predict and verify Yes-Shares","text":"can access voting information federal level using swissdd package.","code":"# Install and attach package devtools::install_github(\"politanch/swissdd\") library(swissdd) # Retrieve results from the canton of Zurich for all federal votes from 2017-03-01 until 2020-09-27 results_raw <- swissdd::get_nationalvotes(from_date = \"2017-03-01\", to_date = \"2020-09-27\") |> filter(canton_id == 1)"},{"path":"/articles/yes-shares.html","id":"option-2-use-the-preloaded-result_data-included-in-the-package","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 2: Use the preloaded result_data included in the package","title":"Predict and verify Yes-Shares","text":"","code":"# Get historical package data results_raw <- result_data"},{"path":"/articles/yes-shares.html","id":"data-wrangling","dir":"Articles","previous_headings":"","what":"Data Wrangling","title":"Predict and verify Yes-Shares","text":"municipalities challenging predict, voting patterns deviates significantly (e.g. city Zurich case). address increasing sample weight upsampling.","code":"# Introduce an artificial error results <- results_raw |> mutate(jaStimmenInProzent = ifelse(mun_id == 7 & id == 6310, jaStimmenInProzent + 15, jaStimmenInProzent)) # Transpose historical data into wide format (= one column per ballot / vote topic) testdata <- results |> filter(mun_id %in% bfs_nrs) |> mutate(id = paste0(\"v\", id)) |> select( jaStimmenInProzent, id, mun_id, mun_name ) |> pivot_wider( names_from = id, values_from = jaStimmenInProzent ) |> drop_na() # Upsampling traindata <- testdata |> mutate(ntimes = ifelse(mun_id %in% c(261, 12), 3, 1)) traindata <- as_tibble(lapply(traindata, rep, traindata$ntimes)) |> select(-ntimes)"},{"path":"/articles/yes-shares.html","id":"prediction","dir":"Articles","previous_headings":"","what":"Prediction","title":"Predict and verify Yes-Shares","text":"use svmRadial model (SVM algorithm radial kernel) predict yes-shares, demonstrated best performance benchmarks maintaining quick computation time.","code":"# Set seed for reproducibility set.seed(42) # Predict results predicted_results <- predict_votes( x = c(\"v6350\", \"v6310\"), traindata = traindata, testdata = testdata, method = \"svmRadial\", geovars = c(\"mun_id\", \"mun_name\") )"},{"path":"/articles/yes-shares.html","id":"detect-outliers","dir":"Articles","previous_headings":"","what":"Detect Outliers","title":"Predict and verify Yes-Shares","text":"","code":"# Calculate the deviation of the reported result from the prediction and flag values that deviate by more than three RMSE as anomalous gem_pred <- predicted_results |> mutate(error = real - pred) |> group_by(vorlage) |> mutate(rmse = rmse(pred, real)) |> mutate(error_rmse = error / rmse) |> mutate(outlier = error_rmse > 3) # Knonau with the anomalous result gets flagged gem_pred %>% filter(outlier == TRUE) #> # A tibble: 1 × 9 #> # Groups: vorlage [1] #> mun_id mun_name pred real vorlage error rmse error_rmse outlier #> #> 1 7 Knonau 46.2 54.2 v6310 8.04 2.23 3.60 TRUE # Plot the deviations ggplot(gem_pred, aes(vorlage,error)) + geom_point(aes(color = outlier)) + geom_violin(alpha = 0.5) + scale_colour_manual(values = c( \"TRUE\" = \"#B01657\", \"FALSE\" = \"#00797B\" )) + theme_minimal()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Thomas Lo Russo. Author. Simon Graf. Author, maintainer.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Lo Russo T, Graf S (2024). plausi: popular vote forensics. R package version 0.2.3.","code":"@Manual{, title = {plausi: popular vote forensics}, author = {Thomas {Lo Russo} and Simon Graf}, year = {2024}, note = {R package version 0.2.3}, }"},{"path":"/index.html","id":"id_-plausi-package","dir":"","previous_headings":"","what":"popular vote forensics","title":"popular vote forensics","text":"Predict votes detect anomalies using R. Plausi package designed R-supported election forensics. provides functions enable identification statistical irregularities anomalies vote results. Key features include: Robust outlier detection small sample sizes skewed distributions Calculation differences possible combinations turnout-levels (e.g., systematic comparison voter turnout across voting districts) Prediction expected results using machine learning algorithms (e.g., yes-vote proportions, voter turnout, etc.) serves basis PlausiApp, used vote result quality control different cantons (TG / SG / ZH). moment, PlausiApp made available upon request via private Repo (mailto:wahlen@statistik.zh.ch).","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"popular vote forensics","text":"can install plausi package GitHub :","code":"# install.packages(\"devtools\") devtools::install_github(\"machinelearningZH/plausi\")"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"popular vote forensics","text":"Attach package good go. need now data. easiest way access data popular votes Switzerland via swissdd package, can easily get wide range vote results, example results national votes 2024-11-24:","code":"library(plausi) devtools::install_github(\"politanch/swissdd\") vote_data <- swissdd::get_nationalvotes(geolevel = \"municipality\", votedates = \"2024-11-24\")"},{"path":"/index.html","id":"licensing","dir":"","previous_headings":"","what":"Licensing","title":"popular vote forensics","text":"package licensed MIT license.","code":""},{"path":"/index.html","id":"project-team","dir":"","previous_headings":"","what":"Project team","title":"popular vote forensics","text":"joint project Vote & Election-Team Team Data Statistical Office Canton Zurich. Responsible: Simon Graf, Thomas Lo Russo Thomas Knecht.","code":""},{"path":"/index.html","id":"feedback-and-contributing","dir":"","previous_headings":"","what":"Feedback and contributing","title":"popular vote forensics","text":"love hear . Please share feedback let us know use code. can write email share ideas opening issue pull requests.","code":""},{"path":"/reference/cross_fun.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculation of the voter turnout difference between two votes — cross_fun","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"function creates table differences turnout two votes every counting circle original data.","code":""},{"path":"/reference/cross_fun.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"","code":"cross_fun(df, issue1, issue2, geo_cols)"},{"path":"/reference/cross_fun.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"df table containing municipality ID voter turnout various issues. column represents specific issue, column names correspond issue IDs (e.g., 'eidg1', 'kant2'). issue1, issue2 character vector specifying name columns containing voter turnout issues interest (e.g., \"eidg1\", \"kant2\"). geo_cols name geo-column containing identifier counting circle.","code":""},{"path":"/reference/cross_fun.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"dataframe turnout difference two vote topics.","code":""},{"path":"/reference/cross_fun.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"","code":"testdata <- data.frame( gemwkid = c(13,49,41,43,44), eidg1 = c(60.90,61.18,65.27,55.36,57.68), eidg2 = c(62.16,62.54,66.95,56.65,58.68), kant1 = c(57.73,60.27,63.31,51.93,54.49) ) cross_fun(testdata, \"eidg1\", \"eidg2\", \"gemwkid\") #> gemwkid combination difference #> 1 13 eidg1_eidg2 -1.26 #> 2 49 eidg1_eidg2 -1.36 #> 3 41 eidg1_eidg2 -1.68 #> 4 43 eidg1_eidg2 -1.29 #> 5 44 eidg1_eidg2 -1.00 # generate combinations combinations <- as.data.frame(t(combn(c(\"eidg1\", \"eidg2\", \"kant1\"), 2))) # difference between columns named as the first combination cross_fun(testdata, combinations$V1[1], combinations$V2[1], \"gemwkid\") #> gemwkid combination difference #> 1 13 eidg1_eidg2 -1.26 #> 2 49 eidg1_eidg2 -1.36 #> 3 41 eidg1_eidg2 -1.68 #> 4 43 eidg1_eidg2 -1.29 #> 5 44 eidg1_eidg2 -1.00"},{"path":"/reference/double_mad.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"Suited find outliers asymetric distributions (contrast standard mad() function works symmetric distributions ) function splits values along median returns separate MADs left right side distribution. https://eurekastatistics.com/using--median-absolute-deviation--find-outliers/","code":""},{"path":"/reference/double_mad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"","code":"double_mad(x, zero_mad_action = NULL)"},{"path":"/reference/double_mad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped","code":""},{"path":"/reference/double_mad.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"numeric vector length 2.","code":""},{"path":"/reference/double_mad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) double_mad(x) #> [1] 2.0 1.5"},{"path":"/reference/double_mad_from_median.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"function suited find outliers asymetric distributions (contrast standard mad() function works symetric distributions ). function splits values along median returns distance every value median, relative left right side MAD. https://eurekastatistics.com/using--median-absolute-deviation--find-outliers/","code":""},{"path":"/reference/double_mad_from_median.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"","code":"double_mad_from_median(x, zero_mad_action = NULL)"},{"path":"/reference/double_mad_from_median.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped","code":""},{"path":"/reference/double_mad_from_median.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"numeric vector length length(x).","code":""},{"path":"/reference/double_mad_from_median.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) double_mad_from_median(x) #> [1] 2.5000000 2.0000000 1.5000000 1.5000000 1.0000000 1.0000000 #> [7] 1.0000000 0.5000000 0.2500000 0.0000000 0.0000000 0.3333333 #> [13] 0.6666667 0.6666667 1.0000000 1.3333333 2.0000000 4.0000000 #> [19] 30.6666667 56.0000000"},{"path":"/reference/get_differences.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculation of the voter turnout difference between multiple votes — get_differences","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"function creates table differences turnout multiple votes every counting circle original data.","code":""},{"path":"/reference/get_differences.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"","code":"get_differences(df, comb1, comb2, geo_cols = c(\"gemwkid\", \"gemeinde\"))"},{"path":"/reference/get_differences.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"df table containing counting circle ID voter turnout various issues. column represents specific issue, column names correspond issue IDs (e.g., 'eidg1', 'kant2'). comb1, comb2 character vector specifying first second set column compared. column names represent columns df contain voter turnout data issues interest (e.g., \"eidg1\", \"kant2\"). geo_cols name geo-column containing identifier counting circle.","code":""},{"path":"/reference/get_differences.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"dataframe containing voter turnout differences combinations vote issues defined.","code":""},{"path":"/reference/get_differences.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"","code":"testdata <- data.frame( gemwkid = c(13,49,41,43,44), eidg1 = c(60.90,61.18,65.27,55.36,57.68), eidg2 = c(62.16,62.54,66.95,56.65,58.68), kant1 = c(57.73,60.27,63.31,51.93,54.49) ) # generate combinations combinations <- as.data.frame(t(combn(c(\"eidg1\", \"eidg2\", \"kant1\"), 2))) # calculate all possible differences between columns get_differences(testdata, combinations$V1, combinations$V2, \"gemwkid\") #> gemwkid combination difference #> 1 13 eidg1_eidg2 -1.26 #> 2 49 eidg1_eidg2 -1.36 #> 3 41 eidg1_eidg2 -1.68 #> 4 43 eidg1_eidg2 -1.29 #> 5 44 eidg1_eidg2 -1.00 #> 6 13 eidg1_kant1 3.17 #> 7 49 eidg1_kant1 0.91 #> 8 41 eidg1_kant1 1.96 #> 9 43 eidg1_kant1 3.43 #> 10 44 eidg1_kant1 3.19 #> 11 13 eidg2_kant1 4.43 #> 12 49 eidg2_kant1 2.27 #> 13 41 eidg2_kant1 3.64 #> 14 43 eidg2_kant1 4.72 #> 15 44 eidg2_kant1 4.19"},{"path":"/reference/is_outlier_double_mad.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"Outlier detection based Median Absolute Deviation (MAD) asymmetric distributions. function calculates distance median every value distribution relative left right side MAD. compares value threshold labels outliers.","code":""},{"path":"/reference/is_outlier_double_mad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"","code":"is_outlier_double_mad(x, zero_mad_action = NULL, threshold = 3.5)"},{"path":"/reference/is_outlier_double_mad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped threshold Z-score threshold (defaults 3.5).","code":""},{"path":"/reference/is_outlier_double_mad.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"logical vector.","code":""},{"path":"/reference/is_outlier_double_mad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_double_mad(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE"},{"path":"/reference/is_outlier_single_mad.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"Outlier detection based Median Absolute Deviation (MAD) symmetric distributions. function calculates distance median every value distribution relative MAD. compares value threshold labels outliers.","code":""},{"path":"/reference/is_outlier_single_mad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"","code":"is_outlier_single_mad(x, threshold = 3, na.rm = TRUE)"},{"path":"/reference/is_outlier_single_mad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"x vector numeric values. threshold Z-score threshold (defaults 3). na.rm Remove NAs, defaults TRUE.","code":""},{"path":"/reference/is_outlier_single_mad.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"logical vector.","code":""},{"path":"/reference/is_outlier_single_mad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_single_mad(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE"},{"path":"/reference/is_outlier_turkey.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using turkey's fences — is_outlier_turkey","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"Outlier detection based turkey's fences. Tukey’s fences technique used box plots. non-outlier range defined Q1−k(Q3−Q1), Q3+k(Q3−Q1), Q1 Q3 lower upper quartiles respectively k - non-negative constant (popular choice 1.5).","code":""},{"path":"/reference/is_outlier_turkey.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"","code":"is_outlier_turkey(x, threshold = 1.5, na.rm = TRUE)"},{"path":"/reference/is_outlier_turkey.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"x vector numeric values. threshold Multiplier IQR set outlier boundaries. Higher values widen range; default 1.5. na.rm TRUE, removes NA values calculations. Default TRUE.","code":""},{"path":"/reference/is_outlier_turkey.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"logical vector.","code":""},{"path":"/reference/is_outlier_turkey.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_turkey(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE"},{"path":"/reference/is_outlier_z.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"Outlier detection based Z-scores symetric distributions. function calculates Z-score, . e. distance value mean number standard deviations.","code":""},{"path":"/reference/is_outlier_z.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"","code":"is_outlier_z(x, threshold = 3, na.rm = TRUE)"},{"path":"/reference/is_outlier_z.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"x vector numeric values. threshold Z-score threshold (defaults 3). na.rm Remove NAs, defaults TRUE.","code":""},{"path":"/reference/is_outlier_z.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"logical vector.","code":""},{"path":"/reference/is_outlier_z.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_z(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE"},{"path":"/reference/outlier_range.html","id":null,"dir":"Reference","previous_headings":"","what":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"Outlier detection based Median Absolute Deviation (MAD) asymetric distributions interquartile range. function calculates distance median every value distribution relative left right side MAD. compares value threshold labels outliers.","code":""},{"path":"/reference/outlier_range.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"","code":"outlier_range(x, zero_mad_action = NULL, threshold = 3.5, percent = TRUE)"},{"path":"/reference/outlier_range.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped threshold Z-score threshold (defaults 3.5). percent Indicator scale data. function run percantage data, lower limit negative upper limit exceed 100 percent. Defaults TRUE.","code":""},{"path":"/reference/outlier_range.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"data.frame numeric range.","code":""},{"path":"/reference/outlier_range.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) outlier_range(x) #> median iqr lower upper label #> 1 6 3.625 0 13 0 - 13"},{"path":"/reference/predict_single_vote.html","id":null,"dir":"Reference","previous_headings":"","what":"Run prediction for one vote — predict_single_vote","title":"Run prediction for one vote — predict_single_vote","text":"function can used predict outcome one vote based trained model, generated using plausi::train_prediction_model(). create replicable examples, use function together set.seed().","code":""},{"path":"/reference/predict_single_vote.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run prediction for one vote — predict_single_vote","text":"","code":"predict_single_vote(model, testdata)"},{"path":"/reference/predict_single_vote.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run prediction for one vote — predict_single_vote","text":"model trained model, generated using plausi::train_prediction_model(). testdata Dataset prediction run. data must contain columns training data model model$trainingData.","code":""},{"path":"/reference/predict_single_vote.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run prediction for one vote — predict_single_vote","text":"data.frame.","code":""},{"path":"/reference/predict_single_vote.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run prediction for one vote — predict_single_vote","text":"","code":"# Set seed for reproducibility set.seed(42) test_model <- train_prediction_model(\"Eidg1\", vote_data, to_exclude_vars = \"Kant1\") #> Loading required package: ggplot2 #> Loading required package: lattice predict_single_vote(test_model, vote_data) #> # A tibble: 171 × 5 #> gemeinde v_gemwkid pred real vorlage #> #> 1 Adlikon 21 23.4 21.5 Eidg1 #> 2 Adliswil 131 47.4 48.3 Eidg1 #> 3 Aesch 241 30.0 30.9 Eidg1 #> 4 Aeugst am Albis 1 33.2 31.5 Eidg1 #> 5 Affoltern am Albis 2 40.7 39.8 Eidg1 #> 6 Altikon 211 28.9 29.8 Eidg1 #> 7 Andelfingen 30 33.0 32.1 Eidg1 #> 8 Bachenbülach 51 38.3 39.9 Eidg1 #> 9 Bachs 81 31.4 30.5 Eidg1 #> 10 Bäretswil 111 32.6 33.3 Eidg1 #> # ℹ 161 more rows"},{"path":"/reference/predict_votes.html","id":null,"dir":"Reference","previous_headings":"","what":"Run predictions for multiple votes — predict_votes","title":"Run predictions for multiple votes — predict_votes","text":"function can used predict outcome multiple votes based number past vote results. uses machine learning models available caret package. create replicable examples, use function together set.seed().","code":""},{"path":"/reference/predict_votes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run predictions for multiple votes — predict_votes","text":"","code":"predict_votes( x, traindata, testdata = traindata, method = \"svmRadial\", trControl = NULL, exclude_votes = TRUE, geovars = c(\"gemeinde\", \"v_gemwkid\"), training_prop = NA, ... )"},{"path":"/reference/predict_votes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run predictions for multiple votes — predict_votes","text":"x Column names dependent variables. traindata Data used train model containing dependent variable predictor columns. testdata Dataset prediction run. data must contain columns training data model model$trainingData. method string specifying classification regression model use. Possible values found using names(getModelInfo()). See http://topepo.github.io/caret/train-models--tag.html. list functions can also passed custom model function. See http://topepo.github.io/caret/using---model--train.html details. trControl list values define function acts. See trainControl http://topepo.github.io/caret/using---model--train.html. (NOTE: given, argument must named.) exclude_votes set TRUE, variables predicted excluded others models. makes sense vote Sunday due differences counting processes. means, lot votes data can contain NAs therefore excluded. Defaults TRUE. geovars Variables containing labels IDs spatial units. training_prop Optional argument define share observations randomly kept training data. generates training dataset excluding inverse proportion training data. ... Optional parameters can passed caret::train() function.","code":""},{"path":"/reference/predict_votes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run predictions for multiple votes — predict_votes","text":"data.frame.","code":""},{"path":"/reference/predict_votes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run predictions for multiple votes — predict_votes","text":"","code":"# Set seed for reproducibility set.seed(42) predict_votes(c(\"Eidg1\", \"Kant1\"), vote_data) #> # A tibble: 342 × 5 #> gemeinde v_gemwkid pred real vorlage #> #> 1 Adlikon 21 23.4 21.5 Eidg1 #> 2 Adliswil 131 47.4 48.3 Eidg1 #> 3 Aesch 241 30.0 30.9 Eidg1 #> 4 Aeugst am Albis 1 33.2 31.5 Eidg1 #> 5 Affoltern am Albis 2 40.7 39.8 Eidg1 #> 6 Altikon 211 28.9 29.8 Eidg1 #> 7 Andelfingen 30 33.0 32.1 Eidg1 #> 8 Bachenbülach 51 38.3 39.9 Eidg1 #> 9 Bachs 81 31.4 30.5 Eidg1 #> 10 Bäretswil 111 32.6 33.3 Eidg1 #> # ℹ 332 more rows"},{"path":"/reference/result_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","title":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","text":"Test data containing results national votes canton Zurich Sundays 2017-03-01 2020-09-27.","code":""},{"path":"/reference/result_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","text":"","code":"result_data"},{"path":"/reference/result_data.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","text":"dataframe 3864 rows 16 columns. name Name referendum. id ID referendum. canton_id ID canton. canton_name Name canton. mun_name Name municipality. geoLevelParentnummer Geo level number parent geo unit, case municipalities means district number. gebietAusgezaehlt Indicator finalised counting status. jaStimmenInProzent Percentage yes-votes. jaStimmenAbsolut Absolut number yes-votes. neinStimmenAbsolut Absolut number -votes. stimmbeteiligungInProzent Turnout. eingelegteStimmzettel Total number submitted ballots. anzahlStimmberechtigte Number elligable voters. gueltigeStimmen Total number valid yes- -votes. votedate Date vote.","code":""},{"path":"/reference/rmse.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate RMSE — rmse","title":"Calculate RMSE — rmse","text":"Calculate Root Mean Square Error (RMSE). RMSE standard deviation residuals (prediction errors) therefore indicator precise prediction specific vote actually .","code":""},{"path":"/reference/rmse.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate RMSE — rmse","text":"","code":"rmse(prediction, observation, na.rm = TRUE)"},{"path":"/reference/rmse.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate RMSE — rmse","text":"prediction Predicted value. observation Oserved value. na.rm Remove NA values, defaults TRUE","code":""},{"path":"/reference/rmse.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate RMSE — rmse","text":"vector numeric values.","code":""},{"path":"/reference/rmse.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate RMSE — rmse","text":"","code":"# Set seed for reproducibility set.seed(42) pred_data <- predict_votes(c(\"Eidg1\", \"Kant1\"), vote_data, exclude_votes = TRUE) pred_data$rmse <- rmse(pred_data$pred, pred_data$real)"},{"path":"/reference/train_prediction_model.html","id":null,"dir":"Reference","previous_headings":"","what":"Train model for prediction of one vote — train_prediction_model","title":"Train model for prediction of one vote — train_prediction_model","text":"function can used train model prediction one vote based number past vote results. uses machine learning models available caret package. create replicable examples, use function together set.seed().","code":""},{"path":"/reference/train_prediction_model.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train model for prediction of one vote — train_prediction_model","text":"","code":"train_prediction_model( x, traindata, method = \"svmRadial\", trControl = NULL, to_exclude_vars = NULL, geovars = c(\"gemeinde\", \"v_gemwkid\"), training_prop = NA, ... )"},{"path":"/reference/train_prediction_model.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train model for prediction of one vote — train_prediction_model","text":"x Column name dependent variable. traindata Data used train model containing dependent variable predictor columns. method string specifying classification regression model use. Possible values found using names(getModelInfo()). See http://topepo.github.io/caret/train-models--tag.html. list functions can also passed custom model function. See http://topepo.github.io/caret/using---model--train.html details. trControl list values define function acts. See trainControl http://topepo.github.io/caret/using---model--train.html. (NOTE: given, argument must named.) to_exclude_vars Variables excluded model. makes sense exclude votes current Sunday since can contain lot NAs negatively impact quality model (since rows containing NAs dropped training data). geovars Variables containing labels IDs spatial units. training_prop Optional argument define share observations randomly kept training data. generates training dataset excluding inverse proportion training data. ... Optional parameters can passed caret::train() function.","code":""},{"path":"/reference/train_prediction_model.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train model for prediction of one vote — train_prediction_model","text":"train object.","code":""},{"path":"/reference/train_prediction_model.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train model for prediction of one vote — train_prediction_model","text":"","code":"# Set seed for reproducibility set.seed(42) train_prediction_model(\"Eidg1\", vote_data, to_exclude_vars = \"Kant1\") #> Support Vector Machines with Radial Basis Function Kernel #> #> 170 samples #> 75 predictor #> #> No pre-processing #> Resampling: Cross-Validated (10 fold) #> Summary of sample sizes: 153, 152, 154, 152, 152, 153, ... #> Resampling results across tuning parameters: #> #> C RMSE Rsquared MAE #> 0.25 5.111902 0.7522515 3.165523 #> 0.50 4.109593 0.8299735 2.578325 #> 1.00 3.491039 0.8702774 2.256367 #> #> Tuning parameter 'sigma' was held constant at a value of 0.01242253 #> RMSE was used to select the optimal model using the smallest value. #> The final values used for the model were sigma = 0.01242253 and C = 1."},{"path":"/reference/vote_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Test data containing past results and incomplete results for two votes. — vote_data","title":"Test data containing past results and incomplete results for two votes. — vote_data","text":"Test data containing past results covering 5 years federal cantonal votes canton Zurich well incomplete results two current issues. units presented counting circles. Counting circles usually correspond municipalities, exception cities Zürich Winterthur, divided published sub units.","code":""},{"path":"/reference/vote_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Test data containing past results and incomplete results for two votes. — vote_data","text":"","code":"vote_data"},{"path":"/reference/vote_data.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Test data containing past results and incomplete results for two votes. — vote_data","text":"dataframe 171 rows 79 columns. gemeinde name counting circle v_gemwkid FSO number municipality (counting circles Zürich Winterthur, consists numeric, zero FSO number municipality) Eidg1 yes-share current incomplete federal vote Kant1 yes-share current incomplete cantonal vote v_.... yes-shares past votes","code":""},{"path":[]},{"path":"/news/index.html","id":"plausi-022","dir":"Changelog","previous_headings":"","what":"plausi 0.2.2","title":"plausi 0.2.2","text":"Removed cli package dependencies.","code":""},{"path":"/news/index.html","id":"plausi-021","dir":"Changelog","previous_headings":"","what":"plausi 0.2.1","title":"plausi 0.2.1","text":"Changed documentation include set.seed() reproducibility.","code":""},{"path":"/news/index.html","id":"plausi-020","dir":"Changelog","previous_headings":"","what":"plausi 0.2.0","title":"plausi 0.2.0","text":"Initial publication refactored functions.","code":""}]
+[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 plausi authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":[]},{"path":"/articles/turnout.html","id":"option-1-retrieve-voting-results-dynamically-via-swissdd-from-opendata-swiss","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 1: Retrieve voting results dynamically via swissdd from opendata.swiss","title":"Turnout Outliers","text":"can access voting information federal level using swissdd package.","code":"# Install and attach package devtools::install_github(\"politanch/swissdd\") library(swissdd) # Retrieve results from the canton of Zurich for all federal votes from 2020-09-27 results <- swissdd::get_nationalvotes(votedates = c(\"2020-09-27\")) |> filter(canton_id == 1)"},{"path":"/articles/turnout.html","id":"option-2-use-the-preloaded-result_data-included-in-the-package","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 2: Use the preloaded result_data included in the package","title":"Turnout Outliers","text":"","code":"# Get historical package data results <- result_data |> filter(votedate == \"2020-09-27\")"},{"path":"/articles/turnout.html","id":"calculate-voter-turnout-differences","dir":"Articles","previous_headings":"","what":"Calculate Voter Turnout Differences","title":"Turnout Outliers","text":"","code":"# Get all possible combinations of proposals combinations <- as.data.frame(t(combn(unique(results$id), 2))) # Reduce the dataset to turnout, vote id and geographic attributes data_wide <- results |> select(id, canton_name, mun_name, mun_id, stimmbeteiligungInProzent) |> # Transpose to wide format pivot_wider(names_from = id, values_from = stimmbeteiligungInProzent) |> mutate_if(is.character, as.factor) # Calculate turnout differences diff1 <- get_differences( data_wide, combinations$V1, combinations$V2, geo_cols = c(\"canton_name\", \"mun_name\", \"mun_id\") )"},{"path":"/articles/turnout.html","id":"identify-suspicious-voter-turnout-differences","dir":"Articles","previous_headings":"","what":"Identify Suspicious Voter Turnout Differences","title":"Turnout Outliers","text":"Now calculated voter turnout differences various proposals municipality, next step identify municipalities show statistically suspicious differences proposal combination. , use is_outlier_double_mad function plausi package. plot helps visualize distribution differences identify particularly values. combinations, differences 3 percentage points certain cases.","code":"diff2 <- diff1 |> group_by(combination) |> # Threshold for outliers: 5 median deviations from median instead of 3.5 mutate(outlier = is_outlier_double_mad(difference, threshold = 5)) |> mutate(median_difference = median(difference)) ggplot(diff2, aes(combination, difference)) + geom_violin() + geom_jitter(alpha = 0.5, aes(color = outlier)) + theme_minimal() + # theme(axis.text.x = element_text(angle = 45), vjust = 0.5, hjust = 1) + theme(axis.text.x = element_text( angle = 45, hjust = 1, vjust = 1 )) + scale_colour_manual(values = c( \"TRUE\" = \"#B01657\", \"FALSE\" = \"#00797B\" )) diff2 |> arrange(desc(abs(difference))) |> head() #> # A tibble: 6 × 7 #> # Groups: combination [6] #> canton_name mun_name mun_id combination difference outlier median_difference #> #> 1 Zürich Buch am I… 24 6310_6320 2.82 TRUE 0.295 #> 2 Zürich Buch am I… 24 6320_6340 -2.82 TRUE -0.0826 #> 3 Zürich Regensberg 95 6340_6350 -2.54 TRUE -0.0836 #> 4 Zürich Buch am I… 24 6320_6330 -2.54 TRUE 0.149 #> 5 Zürich Buch am I… 24 6320_6350 -2.54 TRUE -0.174 #> 6 Zürich Regensberg 95 6310_6340 2.22 TRUE 0.196"},{"path":"/articles/turnout.html","id":"finding-municipalities-with-the-most-notable-turnout-differences","dir":"Articles","previous_headings":"","what":"Finding Municipalities with the Most Notable Turnout Differences","title":"Turnout Outliers","text":"pinpoint municipalities suspicious turnout differences may require investigation, count number flagged outlier combinations municipality. ranking municipalities based number anomalous combinations, can efficiently prioritise review efforts. Municipalities highest count flagged cases considered critical scrutiny. context electoral analysis, ballot Jagdgesetz associated vorlage_id 6320 raises questions case municipality Buch Irchel. illustrate , can examine absolute turnout numbers provided column eingelegteStimmzettel. four five voting topics, number incoming ballots either 484 486. However, one specific topic, count noticeably lower, margin 20 ballots. discrepancy necessarily imply error, situation warrants attention.","code":"anomalous_topics <- diff2 |> separate(combination, into = c(\"vorlage1\", \"vorlage2\"), sep = \"_\") |> filter(outlier == TRUE) |> pivot_longer( cols = -c(canton_name, mun_name, mun_id, difference, outlier, median_difference), names_to = \"vorlage\", values_to = \"vorlage_id\" ) |> group_by(mun_name, mun_id, vorlage_id) |> summarize(n = n()) |> arrange(desc(n)) anomalous_topics #> # A tibble: 82 × 4 #> # Groups: mun_name, mun_id [23] #> mun_name mun_id vorlage_id n #> #> 1 Buch am Irchel 24 6320 4 #> 2 Hüttikon 87 6320 4 #> 3 Regensberg 95 6340 4 #> 4 Bachs 81 6330 3 #> 5 Bäretswil 111 6330 3 #> 6 Bülach 53 6350 3 #> 7 Dänikon 85 6310 3 #> 8 Dorf 26 6310 3 #> 9 Maschwanden 8 6330 3 #> 10 Maschwanden 8 6340 3 #> # ℹ 72 more rows"},{"path":"/articles/turnout.html","id":"why-this-difference-matters","dir":"Articles","previous_headings":"","what":"Why This Difference Matters","title":"Turnout Outliers","text":"deviations noteworthy typically prompt follow-discussions municipalities involved. Possible explanations include: Misplaced Ballots: may mishap leading misplacement 20 ballots. Aggregation Error: may mishap leading erroneous aggregation different bundles ballots. Data Entry Error: discrepancy also result typographical mistake data entry process election result system. immediate evidence wrongdoing systemic issue, kinds irregularities part standard review process ensure accuracy integrity electoral data.","code":"results |> filter(mun_name == \"Buch am Irchel\") |> select( mun_name, mun_id, # name, id, eingelegteStimmzettel ) #> mun_name mun_id id eingelegteStimmzettel #> 1 Buch am Irchel 24 6310 486 #> 2 Buch am Irchel 24 6320 466 #> 3 Buch am Irchel 24 6330 484 #> 4 Buch am Irchel 24 6340 486 #> 5 Buch am Irchel 24 6350 484"},{"path":[]},{"path":"/articles/yes-shares.html","id":"option-1-retrieve-voting-results-dynamically-via-swissdd-from-opendata-swiss","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 1: Retrieve voting results dynamically via swissdd from opendata.swiss","title":"Predict and verify Yes-Shares","text":"can access voting information federal level using swissdd package.","code":"# Install and attach package devtools::install_github(\"politanch/swissdd\") library(swissdd) # Retrieve results from the canton of Zurich for all federal votes from 2017-03-01 until 2020-09-27 results_raw <- swissdd::get_nationalvotes(from_date = \"2017-03-01\", to_date = \"2020-09-27\") |> filter(canton_id == 1)"},{"path":"/articles/yes-shares.html","id":"option-2-use-the-preloaded-result_data-included-in-the-package","dir":"Articles","previous_headings":"Data Retrieval","what":"Option 2: Use the preloaded result_data included in the package","title":"Predict and verify Yes-Shares","text":"","code":"# Get historical package data results_raw <- result_data"},{"path":"/articles/yes-shares.html","id":"data-wrangling","dir":"Articles","previous_headings":"","what":"Data Wrangling","title":"Predict and verify Yes-Shares","text":"municipalities challenging predict, voting patterns deviates significantly (e.g. city Zurich case). address increasing sample weight upsampling.","code":"# Introduce an artificial error results <- results_raw |> mutate(jaStimmenInProzent = ifelse(mun_id == 7 & id == 6310, jaStimmenInProzent + 15, jaStimmenInProzent)) # Transpose historical data into wide format (= one column per ballot / vote topic) testdata <- results |> filter(mun_id %in% bfs_nrs) |> mutate(id = paste0(\"v\", id)) |> select( jaStimmenInProzent, id, mun_id, mun_name ) |> pivot_wider( names_from = id, values_from = jaStimmenInProzent ) |> drop_na() # Upsampling traindata <- testdata |> mutate(ntimes = ifelse(mun_id %in% c(261, 12), 3, 1)) traindata <- as_tibble(lapply(traindata, rep, traindata$ntimes)) |> select(-ntimes)"},{"path":"/articles/yes-shares.html","id":"prediction","dir":"Articles","previous_headings":"","what":"Prediction","title":"Predict and verify Yes-Shares","text":"use svmRadial model (SVM algorithm radial kernel) predict yes-shares, demonstrated best performance benchmarks maintaining quick computation time.","code":"# Set seed for reproducibility set.seed(42) # Predict results predicted_results <- predict_votes( x = c(\"v6350\", \"v6310\"), traindata = traindata, testdata = testdata, method = \"svmRadial\", geovars = c(\"mun_id\", \"mun_name\") )"},{"path":"/articles/yes-shares.html","id":"detect-outliers","dir":"Articles","previous_headings":"","what":"Detect Outliers","title":"Predict and verify Yes-Shares","text":"","code":"# Calculate the deviation of the reported result from the prediction and flag values that deviate by more than three RMSE as anomalous gem_pred <- predicted_results |> mutate(error = real - pred) |> group_by(vorlage) |> mutate(rmse = rmse(pred, real)) |> mutate(error_rmse = error / rmse) |> mutate(outlier = error_rmse > 3) # Knonau with the anomalous result gets flagged gem_pred %>% filter(outlier == TRUE) #> # A tibble: 1 × 9 #> # Groups: vorlage [1] #> mun_id mun_name pred real vorlage error rmse error_rmse outlier #> #> 1 7 Knonau 46.2 54.2 v6310 8.04 2.23 3.60 TRUE # Plot the deviations ggplot(gem_pred, aes(vorlage,error)) + geom_point(aes(color = outlier)) + geom_violin(alpha = 0.5) + scale_colour_manual(values = c( \"TRUE\" = \"#B01657\", \"FALSE\" = \"#00797B\" )) + theme_minimal()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Thomas Lo Russo. Author. Simon Graf. Author, maintainer.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Lo Russo T, Graf S (2024). plausi: popular vote forensics. R package version 0.2.3.","code":"@Manual{, title = {plausi: popular vote forensics}, author = {Thomas {Lo Russo} and Simon Graf}, year = {2024}, note = {R package version 0.2.3}, }"},{"path":"/index.html","id":"id_-plausi-package","dir":"","previous_headings":"","what":"popular vote forensics","title":"popular vote forensics","text":"Predict votes detect anomalies using R. Plausi package designed R-supported election forensics. provides functions enable identification statistical irregularities anomalies vote results. Key features include: Robust outlier detection small sample sizes skewed distributions Calculation differences possible combinations turnout-levels (e.g., systematic comparison voter turnout across voting districts) Prediction expected results using machine learning algorithms (e.g., yes-vote proportions, voter turnout, etc.) serves basis PlausiApp, used vote result quality control different cantons (TG / SG / ZH). moment, PlausiApp made available upon request via private Repo (mailto:wahlen@statistik.zh.ch).","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"popular vote forensics","text":"can install plausi package GitHub :","code":"# install.packages(\"devtools\") devtools::install_github(\"machinelearningZH/plausi\")"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"popular vote forensics","text":"Attach package good go. need now data. easiest way access data popular votes Switzerland via swissdd package, can easily get wide range vote results, example results national votes 2024-11-24:","code":"library(plausi) devtools::install_github(\"politanch/swissdd\") vote_data <- swissdd::get_nationalvotes(geolevel = \"municipality\", votedates = \"2024-11-24\")"},{"path":"/index.html","id":"licensing","dir":"","previous_headings":"","what":"Licensing","title":"popular vote forensics","text":"package licensed MIT license.","code":""},{"path":"/index.html","id":"project-team","dir":"","previous_headings":"","what":"Project team","title":"popular vote forensics","text":"joint project Vote & Election-Team Team Data Statistical Office Canton Zurich. Responsible: Simon Graf, Thomas Lo Russo Thomas Knecht.","code":""},{"path":"/index.html","id":"feedback-and-contributing","dir":"","previous_headings":"","what":"Feedback and contributing","title":"popular vote forensics","text":"love hear . Please share feedback let us know use code. can write email share ideas opening issue pull requests.","code":""},{"path":"/reference/cross_fun.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculation of the voter turnout difference between two votes — cross_fun","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"function creates table differences turnout two votes every counting circle original data.","code":""},{"path":"/reference/cross_fun.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"","code":"cross_fun(df, issue1, issue2, geo_cols)"},{"path":"/reference/cross_fun.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"df table containing municipality ID voter turnout various issues. column represents specific issue, column names correspond issue IDs (e.g., 'eidg1', 'kant2'). issue1, issue2 character vector specifying name columns containing voter turnout issues interest (e.g., \"eidg1\", \"kant2\"). geo_cols name geo-column containing identifier counting circle.","code":""},{"path":"/reference/cross_fun.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"dataframe turnout difference two vote topics.","code":""},{"path":"/reference/cross_fun.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculation of the voter turnout difference between two votes — cross_fun","text":"","code":"testdata <- data.frame( gemwkid = c(13,49,41,43,44), eidg1 = c(60.90,61.18,65.27,55.36,57.68), eidg2 = c(62.16,62.54,66.95,56.65,58.68), kant1 = c(57.73,60.27,63.31,51.93,54.49) ) cross_fun(testdata, \"eidg1\", \"eidg2\", \"gemwkid\") #> gemwkid combination difference #> 1 13 eidg1_eidg2 -1.26 #> 2 49 eidg1_eidg2 -1.36 #> 3 41 eidg1_eidg2 -1.68 #> 4 43 eidg1_eidg2 -1.29 #> 5 44 eidg1_eidg2 -1.00 # generate combinations combinations <- as.data.frame(t(combn(c(\"eidg1\", \"eidg2\", \"kant1\"), 2))) # difference between columns named as the first combination cross_fun(testdata, combinations$V1[1], combinations$V2[1], \"gemwkid\") #> gemwkid combination difference #> 1 13 eidg1_eidg2 -1.26 #> 2 49 eidg1_eidg2 -1.36 #> 3 41 eidg1_eidg2 -1.68 #> 4 43 eidg1_eidg2 -1.29 #> 5 44 eidg1_eidg2 -1.00"},{"path":"/reference/double_mad.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"Suited find outliers asymetric distributions (contrast standard mad() function works symmetric distributions ) function splits values along median returns separate MADs left right side distribution. https://eurekastatistics.com/using--median-absolute-deviation--find-outliers/","code":""},{"path":"/reference/double_mad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"","code":"double_mad(x, zero_mad_action = NULL)"},{"path":"/reference/double_mad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped","code":""},{"path":"/reference/double_mad.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"numeric vector length 2.","code":""},{"path":"/reference/double_mad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the left and right Median Absolute Deviations (MAD) from the median for asymmetric distributions — double_mad","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) double_mad(x) #> [1] 2.0 1.5"},{"path":"/reference/double_mad_from_median.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"function suited find outliers asymetric distributions (contrast standard mad() function works symetric distributions ). function splits values along median returns distance every value median, relative left right side MAD. https://eurekastatistics.com/using--median-absolute-deviation--find-outliers/","code":""},{"path":"/reference/double_mad_from_median.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"","code":"double_mad_from_median(x, zero_mad_action = NULL)"},{"path":"/reference/double_mad_from_median.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped","code":""},{"path":"/reference/double_mad_from_median.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"numeric vector length length(x).","code":""},{"path":"/reference/double_mad_from_median.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate the distance of a value from the median of a distribution in relation to its Median Absolute Deviation (MAD) — double_mad_from_median","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) double_mad_from_median(x) #> [1] 2.5000000 2.0000000 1.5000000 1.5000000 1.0000000 1.0000000 #> [7] 1.0000000 0.5000000 0.2500000 0.0000000 0.0000000 0.3333333 #> [13] 0.6666667 0.6666667 1.0000000 1.3333333 2.0000000 4.0000000 #> [19] 30.6666667 56.0000000"},{"path":"/reference/get_differences.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculation of the voter turnout difference between multiple votes — get_differences","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"function creates table differences turnout multiple votes every counting circle original data.","code":""},{"path":"/reference/get_differences.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"","code":"get_differences(df, comb1, comb2, geo_cols = c(\"gemwkid\", \"gemeinde\"))"},{"path":"/reference/get_differences.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"df table containing counting circle ID voter turnout various issues. column represents specific issue, column names correspond issue IDs (e.g., 'eidg1', 'kant2'). comb1, comb2 character vector specifying first second set column compared. column names represent columns df contain voter turnout data issues interest (e.g., \"eidg1\", \"kant2\"). geo_cols name geo-column containing identifier counting circle.","code":""},{"path":"/reference/get_differences.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"dataframe containing voter turnout differences combinations vote issues defined.","code":""},{"path":"/reference/get_differences.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculation of the voter turnout difference between multiple votes — get_differences","text":"","code":"testdata <- data.frame( gemwkid = c(13,49,41,43,44), eidg1 = c(60.90,61.18,65.27,55.36,57.68), eidg2 = c(62.16,62.54,66.95,56.65,58.68), kant1 = c(57.73,60.27,63.31,51.93,54.49) ) # generate combinations combinations <- as.data.frame(t(combn(c(\"eidg1\", \"eidg2\", \"kant1\"), 2))) # calculate all possible differences between columns get_differences(testdata, combinations$V1, combinations$V2, \"gemwkid\") #> gemwkid combination difference #> 1 13 eidg1_eidg2 -1.26 #> 2 49 eidg1_eidg2 -1.36 #> 3 41 eidg1_eidg2 -1.68 #> 4 43 eidg1_eidg2 -1.29 #> 5 44 eidg1_eidg2 -1.00 #> 6 13 eidg1_kant1 3.17 #> 7 49 eidg1_kant1 0.91 #> 8 41 eidg1_kant1 1.96 #> 9 43 eidg1_kant1 3.43 #> 10 44 eidg1_kant1 3.19 #> 11 13 eidg2_kant1 4.43 #> 12 49 eidg2_kant1 2.27 #> 13 41 eidg2_kant1 3.64 #> 14 43 eidg2_kant1 4.72 #> 15 44 eidg2_kant1 4.19"},{"path":"/reference/is_outlier_double_mad.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"Outlier detection based Median Absolute Deviation (MAD) asymmetric distributions. function calculates distance median every value distribution relative left right side MAD. compares value threshold labels outliers.","code":""},{"path":"/reference/is_outlier_double_mad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"","code":"is_outlier_double_mad(x, zero_mad_action = NULL, threshold = 3.5)"},{"path":"/reference/is_outlier_double_mad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped threshold Z-score threshold (defaults 3.5).","code":""},{"path":"/reference/is_outlier_double_mad.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"logical vector.","code":""},{"path":"/reference/is_outlier_double_mad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using MAD from the median for asymmetric distributions — is_outlier_double_mad","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_double_mad(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE"},{"path":"/reference/is_outlier_single_mad.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"Outlier detection based Median Absolute Deviation (MAD) symmetric distributions. function calculates distance median every value distribution relative MAD. compares value threshold labels outliers.","code":""},{"path":"/reference/is_outlier_single_mad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"","code":"is_outlier_single_mad(x, threshold = 3, na.rm = TRUE)"},{"path":"/reference/is_outlier_single_mad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"x vector numeric values. threshold Z-score threshold (defaults 3). na.rm Remove NAs, defaults TRUE.","code":""},{"path":"/reference/is_outlier_single_mad.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"logical vector.","code":""},{"path":"/reference/is_outlier_single_mad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using Z-score with MAD for symmetric distributions — is_outlier_single_mad","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_single_mad(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE"},{"path":"/reference/is_outlier_turkey.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using turkey's fences — is_outlier_turkey","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"Outlier detection based turkey's fences. Tukey’s fences technique used box plots. non-outlier range defined Q1−k(Q3−Q1), Q3+k(Q3−Q1), Q1 Q3 lower upper quartiles respectively k - non-negative constant (popular choice 1.5).","code":""},{"path":"/reference/is_outlier_turkey.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"","code":"is_outlier_turkey(x, threshold = 1.5, na.rm = TRUE)"},{"path":"/reference/is_outlier_turkey.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"x vector numeric values. threshold Multiplier IQR set outlier boundaries. Higher values widen range; default 1.5. na.rm TRUE, removes NA values calculations. Default TRUE.","code":""},{"path":"/reference/is_outlier_turkey.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"logical vector.","code":""},{"path":"/reference/is_outlier_turkey.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using turkey's fences — is_outlier_turkey","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_turkey(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE"},{"path":"/reference/is_outlier_z.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"Outlier detection based Z-scores symetric distributions. function calculates Z-score, . e. distance value mean number standard deviations.","code":""},{"path":"/reference/is_outlier_z.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"","code":"is_outlier_z(x, threshold = 3, na.rm = TRUE)"},{"path":"/reference/is_outlier_z.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"x vector numeric values. threshold Z-score threshold (defaults 3). na.rm Remove NAs, defaults TRUE.","code":""},{"path":"/reference/is_outlier_z.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"logical vector.","code":""},{"path":"/reference/is_outlier_z.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect outliers using classic Z-scores for symmetric distributions — is_outlier_z","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) is_outlier_z(x) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE"},{"path":"/reference/outlier_range.html","id":null,"dir":"Reference","previous_headings":"","what":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"Outlier detection based Median Absolute Deviation (MAD) asymetric distributions interquartile range. function calculates distance median every value distribution relative left right side MAD. compares value threshold labels outliers.","code":""},{"path":"/reference/outlier_range.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"","code":"outlier_range(x, zero_mad_action = NULL, threshold = 3.5, percent = TRUE)"},{"path":"/reference/outlier_range.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"x vector numeric values. zero_mad_action Determines action event MAD zero. Defaults NULL. options : NULL: process runs warning \"warn\": warning displayed \"stop\": process stopped threshold Z-score threshold (defaults 3.5). percent Indicator scale data. function run percantage data, lower limit negative upper limit exceed 100 percent. Defaults TRUE.","code":""},{"path":"/reference/outlier_range.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"data.frame numeric range.","code":""},{"path":"/reference/outlier_range.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get boundaries beyond which a value is an outlier via MAD from the median for asymmetric distributions and IQR — outlier_range","text":"","code":"x <- c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90) outlier_range(x) #> median iqr lower upper label #> 1 6 3.625 0 13 0 - 13"},{"path":"/reference/predict_single_vote.html","id":null,"dir":"Reference","previous_headings":"","what":"Run prediction for one vote — predict_single_vote","title":"Run prediction for one vote — predict_single_vote","text":"function can used predict outcome one vote based trained model, generated using plausi::train_prediction_model(). create replicable examples, use function together set.seed().","code":""},{"path":"/reference/predict_single_vote.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run prediction for one vote — predict_single_vote","text":"","code":"predict_single_vote(model, testdata)"},{"path":"/reference/predict_single_vote.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run prediction for one vote — predict_single_vote","text":"model trained model, generated using plausi::train_prediction_model(). testdata Dataset prediction run. data must contain columns training data model model$trainingData.","code":""},{"path":"/reference/predict_single_vote.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run prediction for one vote — predict_single_vote","text":"data.frame.","code":""},{"path":"/reference/predict_single_vote.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run prediction for one vote — predict_single_vote","text":"","code":"# Set seed for reproducibility set.seed(42) test_model <- train_prediction_model(\"Eidg1\", vote_data, to_exclude_vars = \"Kant1\") #> Loading required package: ggplot2 #> Loading required package: lattice predict_single_vote(test_model, vote_data) #> # A tibble: 171 × 5 #> gemeinde v_gemwkid pred real vorlage #> #> 1 Adlikon 21 23.4 21.5 Eidg1 #> 2 Adliswil 131 47.4 48.3 Eidg1 #> 3 Aesch 241 30.0 30.9 Eidg1 #> 4 Aeugst am Albis 1 33.2 31.5 Eidg1 #> 5 Affoltern am Albis 2 40.7 39.8 Eidg1 #> 6 Altikon 211 28.9 29.8 Eidg1 #> 7 Andelfingen 30 33.0 32.1 Eidg1 #> 8 Bachenbülach 51 38.3 39.9 Eidg1 #> 9 Bachs 81 31.4 30.5 Eidg1 #> 10 Bäretswil 111 32.6 33.3 Eidg1 #> # ℹ 161 more rows"},{"path":"/reference/predict_votes.html","id":null,"dir":"Reference","previous_headings":"","what":"Run predictions for multiple votes — predict_votes","title":"Run predictions for multiple votes — predict_votes","text":"function can used predict outcome multiple votes based number past vote results. uses machine learning models available caret package. create replicable examples, use function together set.seed().","code":""},{"path":"/reference/predict_votes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run predictions for multiple votes — predict_votes","text":"","code":"predict_votes( x, traindata, testdata = traindata, method = \"svmRadial\", trControl = NULL, exclude_votes = TRUE, geovars = c(\"gemeinde\", \"v_gemwkid\"), training_prop = NA, ... )"},{"path":"/reference/predict_votes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run predictions for multiple votes — predict_votes","text":"x Column names dependent variables. traindata Data used train model containing dependent variable predictor columns. testdata Dataset prediction run. data must contain columns training data model model$trainingData. method string specifying classification regression model use. Possible values found using names(getModelInfo()). See http://topepo.github.io/caret/train-models--tag.html. list functions can also passed custom model function. See http://topepo.github.io/caret/using---model--train.html details. trControl list values define function acts. See trainControl http://topepo.github.io/caret/using---model--train.html. (NOTE: given, argument must named.) exclude_votes set TRUE, variables predicted excluded others models. makes sense vote Sunday due differences counting processes. means, lot votes data can contain NAs therefore excluded. Defaults TRUE. geovars Variables containing labels IDs spatial units. training_prop Optional argument define share observations randomly kept training data. generates training dataset excluding inverse proportion training data. ... Optional parameters can passed caret::train() function.","code":""},{"path":"/reference/predict_votes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run predictions for multiple votes — predict_votes","text":"data.frame.","code":""},{"path":"/reference/predict_votes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run predictions for multiple votes — predict_votes","text":"","code":"# Set seed for reproducibility set.seed(42) predict_votes(c(\"Eidg1\", \"Kant1\"), vote_data) #> # A tibble: 342 × 5 #> gemeinde v_gemwkid pred real vorlage #> #> 1 Adlikon 21 23.4 21.5 Eidg1 #> 2 Adliswil 131 47.4 48.3 Eidg1 #> 3 Aesch 241 30.0 30.9 Eidg1 #> 4 Aeugst am Albis 1 33.2 31.5 Eidg1 #> 5 Affoltern am Albis 2 40.7 39.8 Eidg1 #> 6 Altikon 211 28.9 29.8 Eidg1 #> 7 Andelfingen 30 33.0 32.1 Eidg1 #> 8 Bachenbülach 51 38.3 39.9 Eidg1 #> 9 Bachs 81 31.4 30.5 Eidg1 #> 10 Bäretswil 111 32.6 33.3 Eidg1 #> # ℹ 332 more rows"},{"path":"/reference/result_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","title":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","text":"Test data containing results national votes canton Zurich Sundays 2017-03-01 2020-09-27.","code":""},{"path":"/reference/result_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","text":"","code":"result_data"},{"path":"/reference/result_data.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Test data containing results for national votes from the canton of Zurich for all Sundays from 2017-03-01 to 2020-09-27. — result_data","text":"dataframe 3864 rows 16 columns. name Name referendum. id ID referendum. canton_id ID canton. canton_name Name canton. mun_name Name municipality. geoLevelParentnummer Geo level number parent geo unit, case municipalities means district number. gebietAusgezaehlt Indicator finalised counting status. jaStimmenInProzent Percentage yes-votes. jaStimmenAbsolut Absolut number yes-votes. neinStimmenAbsolut Absolut number -votes. stimmbeteiligungInProzent Turnout. eingelegteStimmzettel Total number submitted ballots. anzahlStimmberechtigte Number elligable voters. gueltigeStimmen Total number valid yes- -votes. votedate Date vote.","code":""},{"path":"/reference/rmse.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate RMSE — rmse","title":"Calculate RMSE — rmse","text":"Calculate Root Mean Square Error (RMSE). RMSE standard deviation residuals (prediction errors) therefore indicator precise prediction specific vote actually .","code":""},{"path":"/reference/rmse.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate RMSE — rmse","text":"","code":"rmse(prediction, observation, na.rm = TRUE)"},{"path":"/reference/rmse.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate RMSE — rmse","text":"prediction Predicted value. observation Oserved value. na.rm Remove NA values, defaults TRUE","code":""},{"path":"/reference/rmse.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate RMSE — rmse","text":"vector numeric values.","code":""},{"path":"/reference/rmse.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate RMSE — rmse","text":"","code":"# Set seed for reproducibility set.seed(42) pred_data <- predict_votes(c(\"Eidg1\", \"Kant1\"), vote_data, exclude_votes = TRUE) pred_data$rmse <- rmse(pred_data$pred, pred_data$real)"},{"path":"/reference/train_prediction_model.html","id":null,"dir":"Reference","previous_headings":"","what":"Train model for prediction of one vote — train_prediction_model","title":"Train model for prediction of one vote — train_prediction_model","text":"function can used train model prediction one vote based number past vote results. uses machine learning models available caret package. create replicable examples, use function together set.seed().","code":""},{"path":"/reference/train_prediction_model.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train model for prediction of one vote — train_prediction_model","text":"","code":"train_prediction_model( x, traindata, method = \"svmRadial\", trControl = NULL, to_exclude_vars = NULL, geovars = c(\"gemeinde\", \"v_gemwkid\"), training_prop = NA, ... )"},{"path":"/reference/train_prediction_model.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train model for prediction of one vote — train_prediction_model","text":"x Column name dependent variable. traindata Data used train model containing dependent variable predictor columns. method string specifying classification regression model use. Possible values found using names(getModelInfo()). See http://topepo.github.io/caret/train-models--tag.html. list functions can also passed custom model function. See http://topepo.github.io/caret/using---model--train.html details. trControl list values define function acts. See trainControl http://topepo.github.io/caret/using---model--train.html. (NOTE: given, argument must named.) to_exclude_vars Variables excluded model. makes sense exclude votes current Sunday since can contain lot NAs negatively impact quality model (since rows containing NAs dropped training data). geovars Variables containing labels IDs spatial units. training_prop Optional argument define share observations randomly kept training data. generates training dataset excluding inverse proportion training data. ... Optional parameters can passed caret::train() function.","code":""},{"path":"/reference/train_prediction_model.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train model for prediction of one vote — train_prediction_model","text":"train object.","code":""},{"path":"/reference/train_prediction_model.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train model for prediction of one vote — train_prediction_model","text":"","code":"# Set seed for reproducibility set.seed(42) train_prediction_model(\"Eidg1\", vote_data, to_exclude_vars = \"Kant1\") #> Support Vector Machines with Radial Basis Function Kernel #> #> 170 samples #> 75 predictor #> #> No pre-processing #> Resampling: Cross-Validated (10 fold) #> Summary of sample sizes: 153, 152, 154, 152, 152, 153, ... #> Resampling results across tuning parameters: #> #> C RMSE Rsquared MAE #> 0.25 5.111902 0.7522515 3.165523 #> 0.50 4.109593 0.8299735 2.578325 #> 1.00 3.491039 0.8702774 2.256367 #> #> Tuning parameter 'sigma' was held constant at a value of 0.01242253 #> RMSE was used to select the optimal model using the smallest value. #> The final values used for the model were sigma = 0.01242253 and C = 1."},{"path":"/reference/vote_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Test data containing past results and incomplete results for two votes. — vote_data","title":"Test data containing past results and incomplete results for two votes. — vote_data","text":"Test data containing past results covering 5 years federal cantonal votes canton Zurich well incomplete results two current issues. units presented counting circles. Counting circles usually correspond municipalities, exception cities Zürich Winterthur, divided published sub units.","code":""},{"path":"/reference/vote_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Test data containing past results and incomplete results for two votes. — vote_data","text":"","code":"vote_data"},{"path":"/reference/vote_data.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Test data containing past results and incomplete results for two votes. — vote_data","text":"dataframe 171 rows 79 columns. gemeinde name counting circle v_gemwkid FSO number municipality (counting circles Zürich Winterthur, consists numeric, zero FSO number municipality) Eidg1 yes-share current incomplete federal vote Kant1 yes-share current incomplete cantonal vote v_.... yes-shares past votes","code":""},{"path":[]},{"path":"/news/index.html","id":"plausi-022","dir":"Changelog","previous_headings":"","what":"plausi 0.2.2","title":"plausi 0.2.2","text":"Removed cli package dependencies.","code":""},{"path":"/news/index.html","id":"plausi-021","dir":"Changelog","previous_headings":"","what":"plausi 0.2.1","title":"plausi 0.2.1","text":"Changed documentation include set.seed() reproducibility.","code":""},{"path":"/news/index.html","id":"plausi-020","dir":"Changelog","previous_headings":"","what":"plausi 0.2.0","title":"plausi 0.2.0","text":"Initial publication refactored functions.","code":""}]